Commit 7cdd6d16 authored by peastman's avatar peastman Committed by GitHub
Browse files

Merge pull request #1837 from peastman/cv

Created CustomCVForce
parents ad5cc98c 711c3a5a
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. * * Portions copyright (c) 2008-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -91,6 +91,7 @@ CudaPlatform::CudaPlatform() { ...@@ -91,6 +91,7 @@ CudaPlatform::CudaPlatform() {
registerKernelFactory(CalcCustomHbondForceKernel::Name(), factory); registerKernelFactory(CalcCustomHbondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCentroidBondForceKernel::Name(), factory); registerKernelFactory(CalcCustomCentroidBondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCompoundBondForceKernel::Name(), factory); registerKernelFactory(CalcCustomCompoundBondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCVForceKernel::Name(), factory);
registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory); registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory);
registerKernelFactory(CalcGayBerneForceKernel::Name(), factory); registerKernelFactory(CalcGayBerneForceKernel::Name(), factory);
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory); registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
...@@ -198,7 +199,23 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string ...@@ -198,7 +199,23 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
if (threadsEnv != NULL) if (threadsEnv != NULL)
stringstream(threadsEnv) >> threads; stringstream(threadsEnv) >> threads;
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue, context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue,
hostCompilerPropValue, pmeStreamPropValue, deterministicForcesValue, threads)); hostCompilerPropValue, pmeStreamPropValue, deterministicForcesValue, threads, NULL));
}
void CudaPlatform::linkedContextCreated(ContextImpl& context, ContextImpl& originalContext) const {
Platform& platform = originalContext.getPlatform();
string devicePropValue = platform.getPropertyValue(originalContext.getOwner(), CudaDeviceIndex());
string blockingPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaUseBlockingSync());
string precisionPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaPrecision());
string cpuPmePropValue = platform.getPropertyValue(originalContext.getOwner(), CudaUseCpuPme());
string compilerPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaCompiler());
string tempPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaTempDirectory());
string hostCompilerPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaHostCompiler());
string pmeStreamPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaDisablePmeStream());
string deterministicForcesValue = platform.getPropertyValue(originalContext.getOwner(), CudaDeterministicForces());
int threads = reinterpret_cast<PlatformData*>(originalContext.getPlatformData())->threads.getNumThreads();
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue,
hostCompilerPropValue, pmeStreamPropValue, deterministicForcesValue, threads, &originalContext));
} }
void CudaPlatform::contextDestroyed(ContextImpl& context) const { void CudaPlatform::contextDestroyed(ContextImpl& context) const {
...@@ -208,7 +225,7 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const { ...@@ -208,7 +225,7 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty, CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty, const string& hostCompilerProperty, const string& pmeStreamProperty, const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty, const string& hostCompilerProperty, const string& pmeStreamProperty,
const string& deterministicForcesProperty, int numThreads) : const string& deterministicForcesProperty, int numThreads, ContextImpl* originalContext) :
context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false), threads(numThreads) { context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false), threads(numThreads) {
bool blocking = (blockingProperty == "true"); bool blocking = (blockingProperty == "true");
vector<string> devices; vector<string> devices;
...@@ -218,16 +235,19 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys ...@@ -218,16 +235,19 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
searchPos = nextPos+1; searchPos = nextPos+1;
} }
devices.push_back(deviceIndexProperty.substr(searchPos)); devices.push_back(deviceIndexProperty.substr(searchPos));
PlatformData* originalData = NULL;
if (originalContext != NULL)
originalData = reinterpret_cast<PlatformData*>(originalContext->getPlatformData());
try { try {
for (int i = 0; i < (int) devices.size(); i++) { for (int i = 0; i < (int) devices.size(); i++) {
if (devices[i].length() > 0) { if (devices[i].length() > 0) {
int deviceIndex; int deviceIndex;
stringstream(devices[i]) >> deviceIndex; stringstream(devices[i]) >> deviceIndex;
contexts.push_back(new CudaContext(system, deviceIndex, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, *this)); contexts.push_back(new CudaContext(system, deviceIndex, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, *this, (originalData == NULL ? NULL : originalData->contexts[i])));
} }
} }
if (contexts.size() == 0) if (contexts.size() == 0)
contexts.push_back(new CudaContext(system, -1, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, *this)); contexts.push_back(new CudaContext(system, -1, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, *this, (originalData == NULL ? NULL : originalData->contexts[0])));
} }
catch (...) { catch (...) {
// If an exception was thrown, do our best to clean up memory. // If an exception was thrown, do our best to clean up memory.
......
/**
* Copy the positions and velocities to the inner context.
*/
extern "C" __global__ void copyState(real4* posq, real4* posqCorrection, mixed4* velm, int* __restrict__ atomOrder,
real4* innerPosq, real4* innerPosqCorrection, mixed4* innerVelm, int* __restrict__ innerInvAtomOrder,
int numAtoms) {
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < numAtoms; i += blockDim.x*gridDim.x) {
int index = innerInvAtomOrder[atomOrder[i]];
innerPosq[index] = posq[i];
innerVelm[index] = velm[i];
#ifdef USE_MIXED_PRECISION
innerPosqCorrection[index] = posqCorrection[i];
#endif
}
}
/**
* Copy the forces back to the main context.
*/
extern "C" __global__ void copyForces(long long* forces, int* __restrict__ invAtomOrder, long long* innerForces,
int* __restrict__ innerAtomOrder, int numAtoms, int paddedNumAtoms) {
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < numAtoms; i += blockDim.x*gridDim.x) {
int index = invAtomOrder[innerAtomOrder[i]];
forces[index] = innerForces[i];
forces[index+paddedNumAtoms] = innerForces[i+paddedNumAtoms];
forces[index+paddedNumAtoms*2] = innerForces[i+paddedNumAtoms*2];
}
}
/**
* Add all the forces from the CVs.
*/
extern "C" __global__ void addForces(long long* forces, int bufferSize
PARAMETER_ARGUMENTS) {
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < bufferSize; i += blockDim.x*gridDim.x) {
ADD_FORCES
}
}
...@@ -73,6 +73,25 @@ __global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __res ...@@ -73,6 +73,25 @@ __global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __res
clearSingleBuffer(buffer6, size6); clearSingleBuffer(buffer6, size6);
} }
/**
* Sum the energy buffer.
*/
__global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __restrict__ result, int bufferSize, int workGroupSize) {
extern __shared__ mixed tempBuffer[];
const unsigned int thread = threadIdx.x;
mixed sum = 0;
for (unsigned int index = thread; index < bufferSize; index += blockDim.x)
sum += energyBuffer[index];
tempBuffer[thread] = sum;
for (int i = 1; i < workGroupSize; i *= 2) {
__syncthreads();
if (thread%(i*2) == 0 && thread+i < workGroupSize)
tempBuffer[thread] += tempBuffer[thread+i];
}
if (thread == 0)
*result = tempBuffer[0];
}
/** /**
* Record the atomic charges into the posq array. * Record the atomic charges into the posq array.
*/ */
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CudaTests.h"
#include "TestCustomCVForce.h"
void runPlatformTests() {
}
...@@ -56,7 +56,7 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) { ...@@ -56,7 +56,7 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
system.addParticle(0.0); system.addParticle(0.0);
CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false", CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false",
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()), platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()),
platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), "false", 1); platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), "false", 1, NULL);
CudaContext& context = *platformData.contexts[0]; CudaContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
OpenMM_SFMT::SFMT sfmt; OpenMM_SFMT::SFMT sfmt;
......
...@@ -56,7 +56,7 @@ void testGaussian() { ...@@ -56,7 +56,7 @@ void testGaussian() {
system.addParticle(1.0); system.addParticle(1.0);
CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false", CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false",
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()), platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()),
platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), "false", 1); platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), "false", 1, NULL);
CudaContext& context = *platformData.contexts[0]; CudaContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
context.getIntegrationUtilities().initRandomNumberGenerator(0); context.getIntegrationUtilities().initRandomNumberGenerator(0);
......
...@@ -66,7 +66,7 @@ void verifySorting(vector<float> array) { ...@@ -66,7 +66,7 @@ void verifySorting(vector<float> array) {
system.addParticle(0.0); system.addParticle(0.0);
CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false", CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false",
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()), platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()),
platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), "false", 1); platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), "false", 1, NULL);
CudaContext& context = *platformData.contexts[0]; CudaContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
CudaArray data(context, array.size(), 4, "sortData"); CudaArray data(context, array.size(), 4, "sortData");
......
...@@ -163,7 +163,8 @@ public: ...@@ -163,7 +163,8 @@ public:
class ForcePostComputation; class ForcePostComputation;
static const int ThreadBlockSize; static const int ThreadBlockSize;
static const int TileSize; static const int TileSize;
OpenCLContext(const System& system, int platformIndex, int deviceIndex, const std::string& precision, OpenCLPlatform::PlatformData& platformData); OpenCLContext(const System& system, int platformIndex, int deviceIndex, const std::string& precision, OpenCLPlatform::PlatformData& platformData,
OpenCLContext* originalContext);
~OpenCLContext(); ~OpenCLContext();
/** /**
* This is called to initialize internal data structures after all Forces in the system * This is called to initialize internal data structures after all Forces in the system
...@@ -363,9 +364,13 @@ public: ...@@ -363,9 +364,13 @@ public:
*/ */
void reduceBuffer(OpenCLArray& array, int numBuffers); void reduceBuffer(OpenCLArray& array, int numBuffers);
/** /**
* Sum the buffesr containing forces. * Sum the buffers containing forces.
*/ */
void reduceForces(); void reduceForces();
/**
* Sum the buffer containing energy.
*/
double reduceEnergy();
/** /**
* Get the current simulation time. * Get the current simulation time.
*/ */
...@@ -749,6 +754,7 @@ private: ...@@ -749,6 +754,7 @@ private:
cl::Kernel clearSixBuffersKernel; cl::Kernel clearSixBuffersKernel;
cl::Kernel reduceReal4Kernel; cl::Kernel reduceReal4Kernel;
cl::Kernel reduceForcesKernel; cl::Kernel reduceForcesKernel;
cl::Kernel reduceEnergyKernel;
cl::Kernel setChargesKernel; cl::Kernel setChargesKernel;
std::vector<OpenCLForceInfo*> forces; std::vector<OpenCLForceInfo*> forces;
std::vector<Molecule> molecules; std::vector<Molecule> molecules;
...@@ -763,6 +769,7 @@ private: ...@@ -763,6 +769,7 @@ private:
OpenCLArray* forceBuffers; OpenCLArray* forceBuffers;
OpenCLArray* longForceBuffer; OpenCLArray* longForceBuffer;
OpenCLArray* energyBuffer; OpenCLArray* energyBuffer;
OpenCLArray* energySum;
OpenCLArray* energyParamDerivBuffer; OpenCLArray* energyParamDerivBuffer;
OpenCLArray* atomIndexDevice; OpenCLArray* atomIndexDevice;
OpenCLArray* chargeBuffer; OpenCLArray* chargeBuffer;
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include "openmm/internal/CompiledExpressionSet.h" #include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/CustomIntegratorUtilities.h" #include "openmm/internal/CustomIntegratorUtilities.h"
#include "lepton/CompiledExpression.h" #include "lepton/CompiledExpression.h"
#include "lepton/ExpressionProgram.h"
#include "openmm/System.h" #include "openmm/System.h"
namespace OpenMM { namespace OpenMM {
...@@ -1207,6 +1208,54 @@ private: ...@@ -1207,6 +1208,54 @@ private:
cl::Kernel framesKernel, blockBoundsKernel, neighborsKernel, forceKernel, torqueKernel; cl::Kernel framesKernel, blockBoundsKernel, neighborsKernel, forceKernel, torqueKernel;
}; };
/**
* This kernel is invoked by CustomCVForce to calculate the forces acting on the system and the energy of the system.
*/
class OpenCLCalcCustomCVForceKernel : public CalcCustomCVForceKernel {
public:
OpenCLCalcCustomCVForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcCustomCVForceKernel(name, platform),
cl(cl), hasInitializedKernels(false), invAtomOrder(NULL), innerInvAtomOrder(NULL) {
}
~OpenCLCalcCustomCVForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomCVForce this kernel will be used for
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/
void initialize(const System& system, const CustomCVForce& force, ContextImpl& innerContext);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, ContextImpl& innerContext, bool includeForces, bool includeEnergy);
/**
* Copy state information to the inner context.
*
* @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/
void copyState(ContextImpl& context, ContextImpl& innerContext);
private:
class ReorderListener;
OpenCLContext& cl;
bool hasInitializedKernels;
Lepton::ExpressionProgram energyExpression;
std::vector<std::string> variableNames, paramDerivNames, globalParameterNames;
std::vector<Lepton::ExpressionProgram> variableDerivExpressions;
std::vector<Lepton::ExpressionProgram> paramDerivExpressions;
std::vector<OpenCLArray*> cvForces;
OpenCLArray* invAtomOrder;
OpenCLArray* innerInvAtomOrder;
cl::Kernel copyStateKernel, copyForcesKernel, addForcesKernel;
};
/** /**
* This kernel is invoked by VerletIntegrator to take one time step. * This kernel is invoked by VerletIntegrator to take one time step.
*/ */
......
...@@ -53,6 +53,7 @@ public: ...@@ -53,6 +53,7 @@ public:
const std::string& getPropertyValue(const Context& context, const std::string& property) const; const std::string& getPropertyValue(const Context& context, const std::string& property) const;
void setPropertyValue(Context& context, const std::string& property, const std::string& value) const; void setPropertyValue(Context& context, const std::string& property, const std::string& value) const;
void contextCreated(ContextImpl& context, const std::map<std::string, std::string>& properties) const; void contextCreated(ContextImpl& context, const std::map<std::string, std::string>& properties) const;
void linkedContextCreated(ContextImpl& context, ContextImpl& originalContext) const;
void contextDestroyed(ContextImpl& context) const; void contextDestroyed(ContextImpl& context) const;
/** /**
* This is the name of the parameter for selecting which OpenCL device or devices to use. * This is the name of the parameter for selecting which OpenCL device or devices to use.
...@@ -108,7 +109,7 @@ public: ...@@ -108,7 +109,7 @@ public:
class OPENMM_EXPORT_OPENCL OpenCLPlatform::PlatformData { class OPENMM_EXPORT_OPENCL OpenCLPlatform::PlatformData {
public: public:
PlatformData(const System& system, const std::string& platformPropValue, const std::string& deviceIndexProperty, const std::string& precisionProperty, PlatformData(const System& system, const std::string& platformPropValue, const std::string& deviceIndexProperty, const std::string& precisionProperty,
const std::string& cpuPmeProperty, const std::string& pmeStreamProperty, int numThreads); const std::string& cpuPmeProperty, const std::string& pmeStreamProperty, int numThreads, ContextImpl* originalContext);
~PlatformData(); ~PlatformData();
void initializeContexts(const System& system); void initializeContexts(const System& system);
void syncContexts(); void syncContexts();
......
...@@ -67,9 +67,9 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i ...@@ -67,9 +67,9 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
std::cerr << "OpenCL internal error: " << errinfo << std::endl; std::cerr << "OpenCL internal error: " << errinfo << std::endl;
} }
OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData) : OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData, OpenCLContext* originalContext) :
system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false), posq(NULL), system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false), posq(NULL),
posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), energySum(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL),
chargeBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) { chargeBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
if (precision == "single") { if (precision == "single") {
useDoublePrecision = false; useDoublePrecision = false;
...@@ -261,8 +261,14 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -261,8 +261,14 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
vector<cl::Device> contextDevices; vector<cl::Device> contextDevices;
contextDevices.push_back(device); contextDevices.push_back(device);
cl_context_properties cprops[] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platforms[bestPlatform](), 0}; cl_context_properties cprops[] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platforms[bestPlatform](), 0};
context = cl::Context(contextDevices, cprops, errorCallback); if (originalContext == NULL) {
defaultQueue = cl::CommandQueue(context, device); context = cl::Context(contextDevices, cprops, errorCallback);
defaultQueue = cl::CommandQueue(context, device);
}
else {
context = originalContext->context;
defaultQueue = originalContext->defaultQueue;
}
currentQueue = defaultQueue; currentQueue = defaultQueue;
numAtoms = system.getNumParticles(); numAtoms = system.getNumParticles();
paddedNumAtoms = TileSize*((numAtoms+TileSize-1)/TileSize); paddedNumAtoms = TileSize*((numAtoms+TileSize-1)/TileSize);
...@@ -309,6 +315,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -309,6 +315,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
reduceReal4Kernel = cl::Kernel(utilities, "reduceReal4Buffer"); reduceReal4Kernel = cl::Kernel(utilities, "reduceReal4Buffer");
if (supports64BitGlobalAtomics) if (supports64BitGlobalAtomics)
reduceForcesKernel = cl::Kernel(utilities, "reduceForces"); reduceForcesKernel = cl::Kernel(utilities, "reduceForces");
reduceEnergyKernel = cl::Kernel(utilities, "reduceEnergy");
setChargesKernel = cl::Kernel(utilities, "setCharges"); setChargesKernel = cl::Kernel(utilities, "setCharges");
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use. // Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
...@@ -436,6 +443,8 @@ OpenCLContext::~OpenCLContext() { ...@@ -436,6 +443,8 @@ OpenCLContext::~OpenCLContext() {
delete longForceBuffer; delete longForceBuffer;
if (energyBuffer != NULL) if (energyBuffer != NULL)
delete energyBuffer; delete energyBuffer;
if (energySum != NULL)
delete energySum;
if (energyParamDerivBuffer != NULL) if (energyParamDerivBuffer != NULL)
delete energyParamDerivBuffer; delete energyParamDerivBuffer;
if (atomIndexDevice != NULL) if (atomIndexDevice != NULL)
...@@ -465,11 +474,19 @@ void OpenCLContext::initialize() { ...@@ -465,11 +474,19 @@ void OpenCLContext::initialize() {
forceBuffers = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers"); forceBuffers = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
force = OpenCLArray::create<mm_double4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force"); force = OpenCLArray::create<mm_double4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force");
energyBuffer = OpenCLArray::create<cl_double>(*this, energyBufferSize, "energyBuffer"); energyBuffer = OpenCLArray::create<cl_double>(*this, energyBufferSize, "energyBuffer");
energySum = OpenCLArray::create<cl_double>(*this, 1, "energySum");
} }
else { else if (useMixedPrecision) {
forceBuffers = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers"); forceBuffers = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
force = OpenCLArray::create<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force"); force = OpenCLArray::create<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force");
energyBuffer = OpenCLArray::create<cl_double>(*this, energyBufferSize, "energyBuffer"); energyBuffer = OpenCLArray::create<cl_double>(*this, energyBufferSize, "energyBuffer");
energySum = OpenCLArray::create<cl_double>(*this, 1, "energySum");
}
else {
forceBuffers = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
force = OpenCLArray::create<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force");
energyBuffer = OpenCLArray::create<cl_float>(*this, energyBufferSize, "energyBuffer");
energySum = OpenCLArray::create<cl_float>(*this, 1, "energySum");
} }
if (supports64BitGlobalAtomics) { if (supports64BitGlobalAtomics) {
longForceBuffer = OpenCLArray::create<cl_long>(*this, 3*paddedNumAtoms, "longForceBuffer"); longForceBuffer = OpenCLArray::create<cl_long>(*this, 3*paddedNumAtoms, "longForceBuffer");
...@@ -750,6 +767,28 @@ void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) { ...@@ -750,6 +767,28 @@ void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) {
executeKernel(reduceReal4Kernel, bufferSize, 128); executeKernel(reduceReal4Kernel, bufferSize, 128);
} }
double OpenCLContext::reduceEnergy() {
int workGroupSize = device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
if (workGroupSize > 512)
workGroupSize = 512;
reduceEnergyKernel.setArg<cl::Buffer>(0, energyBuffer->getDeviceBuffer());
reduceEnergyKernel.setArg<cl::Buffer>(1, energySum->getDeviceBuffer());
reduceEnergyKernel.setArg<cl_int>(2, energyBuffer->getSize());
reduceEnergyKernel.setArg<cl_int>(3, workGroupSize);
reduceEnergyKernel.setArg(4, workGroupSize*energyBuffer->getElementSize(), NULL);
executeKernel(reduceEnergyKernel, workGroupSize, workGroupSize);
if (getUseDoublePrecision() || getUseMixedPrecision()) {
double energy;
energySum->download(&energy);
return energy;
}
else {
float energy;
energySum->download(&energy);
return energy;
}
}
void OpenCLContext::setCharges(const vector<double>& charges) { void OpenCLContext::setCharges(const vector<double>& charges) {
if (chargeBuffer == NULL) if (chargeBuffer == NULL)
chargeBuffer = new OpenCLArray(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer"); chargeBuffer = new OpenCLArray(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer");
......
...@@ -106,6 +106,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo ...@@ -106,6 +106,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return new OpenCLCalcCustomCentroidBondForceKernel(name, platform, cl, context.getSystem()); return new OpenCLCalcCustomCentroidBondForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomCompoundBondForceKernel::Name()) if (name == CalcCustomCompoundBondForceKernel::Name())
return new OpenCLCalcCustomCompoundBondForceKernel(name, platform, cl, context.getSystem()); return new OpenCLCalcCustomCompoundBondForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomCVForceKernel::Name())
return new OpenCLCalcCustomCVForceKernel(name, platform, cl);
if (name == CalcCustomManyParticleForceKernel::Name()) if (name == CalcCustomManyParticleForceKernel::Name())
return new OpenCLCalcCustomManyParticleForceKernel(name, platform, cl, context.getSystem()); return new OpenCLCalcCustomManyParticleForceKernel(name, platform, cl, context.getSystem());
if (name == CalcGayBerneForceKernel::Name()) if (name == CalcGayBerneForceKernel::Name())
......
...@@ -139,21 +139,8 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, ...@@ -139,21 +139,8 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
sum += computation->computeForceAndEnergy(includeForces, includeEnergy, groups); sum += computation->computeForceAndEnergy(includeForces, includeEnergy, groups);
cl.reduceForces(); cl.reduceForces();
cl.getIntegrationUtilities().distributeForcesFromVirtualSites(); cl.getIntegrationUtilities().distributeForcesFromVirtualSites();
if (includeEnergy) { if (includeEnergy)
OpenCLArray& energyArray = cl.getEnergyBuffer(); sum += cl.reduceEnergy();
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double* energy = (double*) cl.getPinnedBuffer();
energyArray.download(energy);
for (int i = 0; i < energyArray.getSize(); i++)
sum += energy[i];
}
else {
float* energy = (float*) cl.getPinnedBuffer();
energyArray.download(energy);
for (int i = 0; i < energyArray.getSize(); i++)
sum += energy[i];
}
}
if (!cl.getForcesValid()) if (!cl.getForcesValid())
valid = false; valid = false;
return sum; return sum;
...@@ -6874,6 +6861,191 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() { ...@@ -6874,6 +6861,191 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
exclusionStartIndex->upload(startIndexVec); exclusionStartIndex->upload(startIndexVec);
} }
class OpenCLCalcCustomCVForceKernel::ReorderListener : public OpenCLContext::ReorderListener {
public:
ReorderListener(OpenCLContext& cl, OpenCLArray& invAtomOrder) : cl(cl), invAtomOrder(invAtomOrder) {
}
void execute() {
vector<cl_int> invOrder(cl.getPaddedNumAtoms());
const vector<int>& order = cl.getAtomIndex();
for (int i = 0; i < order.size(); i++)
invOrder[order[i]] = i;
invAtomOrder.upload(invOrder);
}
private:
OpenCLContext& cl;
OpenCLArray& invAtomOrder;
};
OpenCLCalcCustomCVForceKernel::~OpenCLCalcCustomCVForceKernel() {
for (auto force : cvForces)
delete force;
if (invAtomOrder != NULL)
delete invAtomOrder;
if (innerInvAtomOrder != NULL)
delete innerInvAtomOrder;
}
void OpenCLCalcCustomCVForceKernel::initialize(const System& system, const CustomCVForce& force, ContextImpl& innerContext) {
int numCVs = force.getNumCollectiveVariables();
cl.addForce(new OpenCLForceInfo(1));
for (int i = 0; i < force.getNumGlobalParameters(); i++)
globalParameterNames.push_back(force.getGlobalParameterName(i));
// Create custom functions for the tabulated functions.
map<string, Lepton::CustomFunction*> functions;
for (int i = 0; i < (int) force.getNumTabulatedFunctions(); i++)
functions[force.getTabulatedFunctionName(i)] = createReferenceTabulatedFunction(force.getTabulatedFunction(i));
// Create the expressions.
Lepton::ParsedExpression energyExpr = Lepton::Parser::parse(force.getEnergyFunction(), functions);
energyExpression = energyExpr.createProgram();
for (int i = 0; i < numCVs; i++) {
string name = force.getCollectiveVariableName(i);
variableNames.push_back(name);
variableDerivExpressions.push_back(energyExpr.differentiate(name).optimize().createProgram());
}
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string name = force.getEnergyParameterDerivativeName(i);
paramDerivNames.push_back(name);
paramDerivExpressions.push_back(energyExpr.differentiate(name).optimize().createProgram());
cl.addEnergyParameterDerivative(name);
}
// Delete the custom functions.
for (auto& function : functions)
delete function.second;
// Copy parameter derivatives from the inner context.
OpenCLContext& cl2 = *reinterpret_cast<OpenCLPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
for (auto& param : cl2.getEnergyParamDerivNames())
cl.addEnergyParameterDerivative(param);
// Create arrays for storing information.
int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
for (int i = 0; i < numCVs; i++)
cvForces.push_back(new OpenCLArray(cl, cl.getNumAtoms(), 4*elementSize, "cvForce"));
invAtomOrder = OpenCLArray::create<cl_int>(cl, cl.getPaddedNumAtoms(), "invAtomOrder");
innerInvAtomOrder = OpenCLArray::create<cl_int>(cl, cl.getPaddedNumAtoms(), "innerInvAtomOrder");
// Create the kernels.
stringstream args, add;
for (int i = 0; i < numCVs; i++) {
args << ", __global real4* restrict force" << i << ", real dEdV" << i;
add << "f += force" << i << "[i]*dEdV" << i << ";\n";
}
map<string, string> replacements;
replacements["PARAMETER_ARGUMENTS"] = args.str();
replacements["ADD_FORCES"] = add.str();
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customCVForce, replacements));
copyStateKernel = cl::Kernel(program, "copyState");
copyForcesKernel = cl::Kernel(program, "copyForces");
addForcesKernel = cl::Kernel(program, "addForces");
}
double OpenCLCalcCustomCVForceKernel::execute(ContextImpl& context, ContextImpl& innerContext, bool includeForces, bool includeEnergy) {
copyState(context, innerContext);
int numCVs = variableNames.size();
int numAtoms = cl.getNumAtoms();
OpenCLContext& cl2 = *reinterpret_cast<OpenCLPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
vector<double> cvValues;
vector<map<string, double> > cvDerivs(numCVs);
for (int i = 0; i < numCVs; i++) {
cvValues.push_back(innerContext.calcForcesAndEnergy(true, true, 1<<i));
copyForcesKernel.setArg<cl::Buffer>(0, cvForces[i]->getDeviceBuffer());
cl.executeKernel(copyForcesKernel, numAtoms);
innerContext.getEnergyParameterDerivatives(cvDerivs[i]);
}
// Compute the energy and forces.
map<string, double> variables;
for (auto& name : globalParameterNames)
variables[name] = context.getParameter(name);
for (int i = 0; i < numCVs; i++)
variables[variableNames[i]] = cvValues[i];
double energy = energyExpression.evaluate(variables);
for (int i = 0; i < numCVs; i++) {
double dEdV = variableDerivExpressions[i].evaluate(variables);
if (cl.getUseDoublePrecision())
addForcesKernel.setArg<cl_double>(2*i+3, dEdV);
else
addForcesKernel.setArg<cl_float>(2*i+3, dEdV);
}
cl.executeKernel(addForcesKernel, numAtoms);
// Compute the energy parameter derivatives.
map<string, double>& energyParamDerivs = cl.getEnergyParamDerivWorkspace();
for (int i = 0; i < paramDerivExpressions.size(); i++)
energyParamDerivs[paramDerivNames[i]] += paramDerivExpressions[i].evaluate(variables);
for (int i = 0; i < numCVs; i++) {
double dEdV = variableDerivExpressions[i].evaluate(variables);
for (auto& deriv : cvDerivs[i])
energyParamDerivs[deriv.first] += dEdV*deriv.second;
}
return energy;
}
void OpenCLCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl& innerContext) {
int numAtoms = cl.getNumAtoms();
OpenCLContext& cl2 = *reinterpret_cast<OpenCLPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
if (!hasInitializedKernels) {
hasInitializedKernels = true;
// Initialize the listeners.
ReorderListener* listener1 = new ReorderListener(cl, *invAtomOrder);
ReorderListener* listener2 = new ReorderListener(cl2, *innerInvAtomOrder);
cl.addReorderListener(listener1);
cl2.addReorderListener(listener2);
listener1->execute();
listener2->execute();
// Initialize the kernels.
copyStateKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(3, cl.getAtomIndexArray().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(4, cl2.getPosq().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(6, cl2.getVelm().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(7, innerInvAtomOrder->getDeviceBuffer());
copyStateKernel.setArg<cl_int>(8, numAtoms);
if (cl.getUseMixedPrecision()) {
copyStateKernel.setArg<cl::Buffer>(1, cl.getPosqCorrection().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(5, cl2.getPosqCorrection().getDeviceBuffer());
}
else {
copyStateKernel.setArg<void*>(1, NULL);
copyStateKernel.setArg<void*>(5, NULL);
}
copyForcesKernel.setArg<cl::Buffer>(1, invAtomOrder->getDeviceBuffer());
copyForcesKernel.setArg<cl::Buffer>(2, cl2.getForce().getDeviceBuffer());
copyForcesKernel.setArg<cl::Buffer>(3, cl2.getAtomIndexArray().getDeviceBuffer());
copyForcesKernel.setArg<cl_int>(4, numAtoms);
addForcesKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer());
addForcesKernel.setArg<cl_int>(1, numAtoms);
for (int i = 0; i < cvForces.size(); i++)
addForcesKernel.setArg<cl::Buffer>(2*i+2, cvForces[i]->getDeviceBuffer());
}
cl.executeKernel(copyStateKernel, numAtoms);
Vec3 a, b, c;
context.getPeriodicBoxVectors(a, b, c);
innerContext.setPeriodicBoxVectors(a, b, c);
innerContext.setTime(context.getTime());
map<string, double> innerParameters = innerContext.getParameters();
for (auto& param : innerParameters)
innerContext.setParameter(param.first, context.getParameter(param.first));
}
OpenCLIntegrateVerletStepKernel::~OpenCLIntegrateVerletStepKernel() { OpenCLIntegrateVerletStepKernel::~OpenCLIntegrateVerletStepKernel() {
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. * * Portions copyright (c) 2008-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -82,6 +82,7 @@ OpenCLPlatform::OpenCLPlatform() { ...@@ -82,6 +82,7 @@ OpenCLPlatform::OpenCLPlatform() {
registerKernelFactory(CalcCustomHbondForceKernel::Name(), factory); registerKernelFactory(CalcCustomHbondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCentroidBondForceKernel::Name(), factory); registerKernelFactory(CalcCustomCentroidBondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCompoundBondForceKernel::Name(), factory); registerKernelFactory(CalcCustomCompoundBondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCVForceKernel::Name(), factory);
registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory); registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory);
registerKernelFactory(CalcGayBerneForceKernel::Name(), factory); registerKernelFactory(CalcGayBerneForceKernel::Name(), factory);
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory); registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
...@@ -179,7 +180,20 @@ void OpenCLPlatform::contextCreated(ContextImpl& context, const map<string, stri ...@@ -179,7 +180,20 @@ void OpenCLPlatform::contextCreated(ContextImpl& context, const map<string, stri
char* threadsEnv = getenv("OPENMM_CPU_THREADS"); char* threadsEnv = getenv("OPENMM_CPU_THREADS");
if (threadsEnv != NULL) if (threadsEnv != NULL)
stringstream(threadsEnv) >> threads; stringstream(threadsEnv) >> threads;
context.setPlatformData(new PlatformData(context.getSystem(), platformPropValue, devicePropValue, precisionPropValue, cpuPmePropValue, pmeStreamPropValue, threads)); context.setPlatformData(new PlatformData(context.getSystem(), platformPropValue, devicePropValue, precisionPropValue, cpuPmePropValue,
pmeStreamPropValue, threads, NULL));
}
void OpenCLPlatform::linkedContextCreated(ContextImpl& context, ContextImpl& originalContext) const {
Platform& platform = originalContext.getPlatform();
string platformPropValue = platform.getPropertyValue(originalContext.getOwner(), OpenCLPlatformIndex());
string devicePropValue = platform.getPropertyValue(originalContext.getOwner(), OpenCLDeviceIndex());
string precisionPropValue = platform.getPropertyValue(originalContext.getOwner(), OpenCLPrecision());
string cpuPmePropValue = platform.getPropertyValue(originalContext.getOwner(), OpenCLUseCpuPme());
string pmeStreamPropValue = platform.getPropertyValue(originalContext.getOwner(), OpenCLDisablePmeStream());
int threads = reinterpret_cast<PlatformData*>(originalContext.getPlatformData())->threads.getNumThreads();
context.setPlatformData(new PlatformData(context.getSystem(), platformPropValue, devicePropValue, precisionPropValue, cpuPmePropValue,
pmeStreamPropValue, threads, &originalContext));
} }
void OpenCLPlatform::contextDestroyed(ContextImpl& context) const { void OpenCLPlatform::contextDestroyed(ContextImpl& context) const {
...@@ -188,7 +202,7 @@ void OpenCLPlatform::contextDestroyed(ContextImpl& context) const { ...@@ -188,7 +202,7 @@ void OpenCLPlatform::contextDestroyed(ContextImpl& context) const {
} }
OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& platformPropValue, const string& deviceIndexProperty, OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& platformPropValue, const string& deviceIndexProperty,
const string& precisionProperty, const string& cpuPmeProperty, const string& pmeStreamProperty, int numThreads) : const string& precisionProperty, const string& cpuPmeProperty, const string& pmeStreamProperty, int numThreads, ContextImpl* originalContext) :
removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false), threads(numThreads) { removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false), threads(numThreads) {
int platformIndex = -1; int platformIndex = -1;
if (platformPropValue.length() > 0) if (platformPropValue.length() > 0)
...@@ -200,16 +214,19 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p ...@@ -200,16 +214,19 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
searchPos = nextPos+1; searchPos = nextPos+1;
} }
devices.push_back(deviceIndexProperty.substr(searchPos)); devices.push_back(deviceIndexProperty.substr(searchPos));
PlatformData* originalData = NULL;
if (originalContext != NULL)
originalData = reinterpret_cast<PlatformData*>(originalContext->getPlatformData());
try { try {
for (int i = 0; i < (int) devices.size(); i++) { for (int i = 0; i < (int) devices.size(); i++) {
if (devices[i].length() > 0) { if (devices[i].length() > 0) {
int deviceIndex; int deviceIndex;
stringstream(devices[i]) >> deviceIndex; stringstream(devices[i]) >> deviceIndex;
contexts.push_back(new OpenCLContext(system, platformIndex, deviceIndex, precisionProperty, *this)); contexts.push_back(new OpenCLContext(system, platformIndex, deviceIndex, precisionProperty, *this, (originalData == NULL ? NULL : originalData->contexts[i])));
} }
} }
if (contexts.size() == 0) if (contexts.size() == 0)
contexts.push_back(new OpenCLContext(system, platformIndex, -1, precisionProperty, *this)); contexts.push_back(new OpenCLContext(system, platformIndex, -1, precisionProperty, *this, (originalData == NULL ? NULL : originalData->contexts[0])));
} }
catch (...) { catch (...) {
// If an exception was thrown, do our best to clean up memory. // If an exception was thrown, do our best to clean up memory.
......
/**
* Copy the positions and velocities to the inner context.
*/
__kernel void copyState(__global real4* posq, __global real4* posqCorrection, __global mixed4* velm, __global int* restrict atomOrder,
__global real4* innerPosq, __global real4* innerPosqCorrection, __global mixed4* innerVelm, __global int* restrict innerInvAtomOrder,
int numAtoms) {
for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
int index = innerInvAtomOrder[atomOrder[i]];
innerPosq[index] = posq[i];
innerVelm[index] = velm[i];
#ifdef USE_MIXED_PRECISION
innerPosqCorrection[index] = posqCorrection[i];
#endif
}
}
/**
* Copy the forces back to the main context.
*/
__kernel void copyForces(__global real4* forces, __global int* restrict invAtomOrder, __global real4* innerForces,
__global int* restrict innerAtomOrder, int numAtoms) {
for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
int index = invAtomOrder[innerAtomOrder[i]];
forces[index] = innerForces[i];
}
}
/**
* Add all the forces from the CVs.
*/
__kernel void addForces(__global real4* forces, int numAtoms
PARAMETER_ARGUMENTS) {
for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
real4 f = forces[i];
ADD_FORCES
forces[i] = f;
}
}
\ No newline at end of file
...@@ -97,6 +97,24 @@ __kernel void reduceForces(__global const long* restrict longBuffer, __global re ...@@ -97,6 +97,24 @@ __kernel void reduceForces(__global const long* restrict longBuffer, __global re
} }
#endif #endif
/**
* Sum the energy buffer.
*/
__kernel void reduceEnergy(__global const mixed* restrict energyBuffer, __global mixed* restrict result, int bufferSize, int workGroupSize, __local mixed* tempBuffer) {
const unsigned int thread = get_local_id(0);
mixed sum = 0;
for (unsigned int index = thread; index < bufferSize; index += get_local_size(0))
sum += energyBuffer[index];
tempBuffer[thread] = sum;
for (int i = 1; i < workGroupSize; i *= 2) {
barrier(CLK_LOCAL_MEM_FENCE);
if (thread%(i*2) == 0 && thread+i < workGroupSize)
tempBuffer[thread] += tempBuffer[thread+i];
}
if (thread == 0)
*result = tempBuffer[0];
}
/** /**
* This is called to determine the accuracy of various native functions. * This is called to determine the accuracy of various native functions.
*/ */
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "OpenCLTests.h"
#include "TestCustomCVForce.h"
void runPlatformTests() {
}
...@@ -54,7 +54,7 @@ template <class Real2> ...@@ -54,7 +54,7 @@ template <class Real2>
void testTransform(bool realToComplex, int xsize, int ysize, int zsize) { void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
System system; System system;
system.addParticle(0.0); system.addParticle(0.0);
OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1); OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1, NULL);
OpenCLContext& context = *platformData.contexts[0]; OpenCLContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
OpenMM_SFMT::SFMT sfmt; OpenMM_SFMT::SFMT sfmt;
......
...@@ -54,7 +54,7 @@ void testGaussian() { ...@@ -54,7 +54,7 @@ void testGaussian() {
System system; System system;
for (int i = 0; i < numAtoms; i++) for (int i = 0; i < numAtoms; i++)
system.addParticle(1.0); system.addParticle(1.0);
OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1); OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1, NULL);
OpenCLContext& context = *platformData.contexts[0]; OpenCLContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
context.getIntegrationUtilities().initRandomNumberGenerator(0); context.getIntegrationUtilities().initRandomNumberGenerator(0);
......
...@@ -64,7 +64,7 @@ void verifySorting(vector<float> array) { ...@@ -64,7 +64,7 @@ void verifySorting(vector<float> array) {
System system; System system;
system.addParticle(0.0); system.addParticle(0.0);
OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1); OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1, NULL);
OpenCLContext& context = *platformData.contexts[0]; OpenCLContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
OpenCLArray data(context, array.size(), sizeof(float), "sortData"); OpenCLArray data(context, array.size(), sizeof(float), "sortData");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment