Unverified Commit edbc8407 authored by peastman's avatar peastman Committed by GitHub
Browse files

Common compute framework to unify CUDA and OpenCL code (#2488)

* Began creating common compute framework to unify code between CUDA and OpenCL

* Began OpenCL implementation of common compute framework

* Common implementation of CMMotionRemover

* CUDA implementation of common compute interface

* Converted HarmonicBondForce to common compute API

* Converted standard bonded forces to common compute API

* Converted ExpressionUtilities to common compute API

* Created ComputeParameterSet

* Converted custom bonded forces to common compute API

* Converted CustomCentroidBondForce to common compute API

* Converted CustomManyParticleForce to common compute API

* Moved lots of duplicate code from CudaContext and OpenCLContext to ComputeContext

* Converted GayBerneForce to common compute API

* Removed obsolete kernels

* Converted verlet integrators to common compute API

* Converted Langevin and Brownian integrators to common compute API

* Converted CustomIntegrator to common compute API

* Converted CustomNonbondedForce to common compute API

* Removed uses of a deprecated API

* Fixed failing test cases

* Converted GBSAOBCForce to common compute API

* Began converting CustomGBForce to common compute API

* Finished converting CustomGBForce to common compute API

* Merged duplicated code in CudaIntegrationUtilities and OpenCLIntegrationUtilities

* Converted RMSDForce and AndersenThermostat to common compute API

* Converted CustomHbondForce to common compute API

* Merged scripts for encoding kernel sources

* Converted Drude plugin to common compute API

* Fixed errors in CMake scripts

* Attempt at fixing errors on Windows

* Added discussion of common compute API to developer guide

* Added Windows export macro for common classes

* Fixed error in CMMotionRemover

* Ubdated travis to newer Ubuntu version

* Fixed errors on CPU OpenCL

* Fixed Windows linking errors

* Added missing pragma for 32 bit atomics

* Replaced long long with mm_long

* More fixes to Windows linking

* Bug fix
parent 38beeefe
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
namespace OpenMM { namespace OpenMM {
class OPENMM_EXPORT_OPENCL OpenCLCompact { class OPENMM_EXPORT_COMMON OpenCLCompact {
public: public:
OpenCLCompact(OpenCLContext& context); OpenCLCompact(OpenCLContext& context);
void compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArray& dValid, OpenCLArray& numValid); void compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArray& dValid, OpenCLArray& numValid);
......
...@@ -28,7 +28,6 @@ ...@@ -28,7 +28,6 @@
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include <map> #include <map>
#include <queue>
#include <string> #include <string>
#define __CL_ENABLE_EXCEPTIONS #define __CL_ENABLE_EXCEPTIONS
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS #define CL_USE_DEPRECATED_OPENCL_1_1_APIS
...@@ -50,39 +49,23 @@ ...@@ -50,39 +49,23 @@
#endif #endif
#include <pthread.h> #include <pthread.h>
#include <cl.hpp> #include <cl.hpp>
#include "windowsExportOpenCL.h" #include "openmm/common/windowsExportCommon.h"
#include "OpenCLArray.h" #include "OpenCLArray.h"
#include "OpenCLBondedUtilities.h"
#include "OpenCLExpressionUtilities.h"
#include "OpenCLIntegrationUtilities.h"
#include "OpenCLNonbondedUtilities.h"
#include "OpenCLPlatform.h" #include "OpenCLPlatform.h"
#include "openmm/common/ComputeContext.h"
namespace OpenMM { namespace OpenMM {
class OpenCLForceInfo; class OpenCLForceInfo;
class OpenCLIntegrationUtilities;
class OpenCLExpressionUtilities;
class OpenCLBondedUtilities;
class OpenCLNonbondedUtilities;
class System;
/** /**
* We can't use predefined vector types like cl_float4, since different OpenCL implementations currently define * These are a few extra vector types beyond the ones in ComputeVectorTypes.h.
* them in incompatible ways. Hopefully that will be fixed in the future. In the mean time, we define our own
* types to represent them on the host.
*/ */
struct mm_float2 {
cl_float x, y;
mm_float2() {
}
mm_float2(cl_float x, cl_float y) : x(x), y(y) {
}
};
struct mm_float4 {
cl_float x, y, z, w;
mm_float4() {
}
mm_float4(cl_float x, cl_float y, cl_float z, cl_float w) : x(x), y(y), z(z), w(w) {
}
};
struct mm_float8 { struct mm_float8 {
cl_float s0, s1, s2, s3, s4, s5, s6, s7; cl_float s0, s1, s2, s3, s4, s5, s6, s7;
mm_float8() { mm_float8() {
...@@ -101,20 +84,6 @@ struct mm_float16 { ...@@ -101,20 +84,6 @@ struct mm_float16 {
s8(s8), s9(s9), s10(s10), s11(s11), s12(s12), s13(s13), s14(s14), s15(15) { s8(s8), s9(s9), s10(s10), s11(s11), s12(s12), s13(s13), s14(s14), s15(15) {
} }
}; };
struct mm_double2 {
cl_double x, y;
mm_double2() {
}
mm_double2(cl_double x, cl_double y) : x(x), y(y) {
}
};
struct mm_double4 {
cl_double x, y, z, w;
mm_double4() {
}
mm_double4(cl_double x, cl_double y, cl_double z, cl_double w) : x(x), y(y), z(z), w(w) {
}
};
struct mm_ushort2 { struct mm_ushort2 {
cl_ushort x, y; cl_ushort x, y;
mm_ushort2() { mm_ushort2() {
...@@ -122,20 +91,6 @@ struct mm_ushort2 { ...@@ -122,20 +91,6 @@ struct mm_ushort2 {
mm_ushort2(cl_ushort x, cl_ushort y) : x(x), y(y) { mm_ushort2(cl_ushort x, cl_ushort y) : x(x), y(y) {
} }
}; };
struct mm_int2 {
cl_int x, y;
mm_int2() {
}
mm_int2(cl_int x, cl_int y) : x(x), y(y) {
}
};
struct mm_int4 {
cl_int x, y, z, w;
mm_int4() {
}
mm_int4(cl_int x, cl_int y, cl_int z, cl_int w) : x(x), y(y), z(z), w(w) {
}
};
struct mm_int8 { struct mm_int8 {
cl_int s0, s1, s2, s3, s4, s5, s6, s7; cl_int s0, s1, s2, s3, s4, s5, s6, s7;
mm_int8() { mm_int8() {
...@@ -166,10 +121,9 @@ struct mm_int16 { ...@@ -166,10 +121,9 @@ struct mm_int16 {
* thread is not used and calculations are performed on the main application thread. * thread is not used and calculations are performed on the main application thread.
*/ */
class OPENMM_EXPORT_OPENCL OpenCLContext { class OPENMM_EXPORT_COMMON OpenCLContext : public ComputeContext {
public: public:
class WorkTask; class WorkTask;
class WorkThread;
class ReorderListener; class ReorderListener;
class ForcePreComputation; class ForcePreComputation;
class ForcePostComputation; class ForcePostComputation;
...@@ -184,13 +138,14 @@ public: ...@@ -184,13 +138,14 @@ public:
*/ */
void initialize(); void initialize();
/** /**
* Add an OpenCLForceInfo to this context. * Add an ComputeForceInfo to this context.
*/ */
void addForce(OpenCLForceInfo* force); void addForce(ComputeForceInfo* force);
/** /**
* Get all OpenCLForceInfos that have been added to this context. * Request that the context provide at least a particular number of force buffers.
* Force kernels should call this during initialization.
*/ */
std::vector<OpenCLForceInfo*>& getForceInfos(); void requestForceBuffers(int minBuffers);
/** /**
* Get the cl::Context associated with this object. * Get the cl::Context associated with this object.
*/ */
...@@ -221,6 +176,14 @@ public: ...@@ -221,6 +176,14 @@ public:
OpenCLPlatform::PlatformData& getPlatformData() { OpenCLPlatform::PlatformData& getPlatformData() {
return platformData; return platformData;
} }
/**
* Get the number of contexts being used for the current simulation.
* This is relevant when a simulation is parallelized across multiple devices. In that case,
* one OpenCLContext is created for each device.
*/
int getNumContexts() const {
return platformData.contexts.size();
}
/** /**
* Get the index of this context in the list stored in the PlatformData. * Get the index of this context in the list stored in the PlatformData.
*/ */
...@@ -239,6 +202,28 @@ public: ...@@ -239,6 +202,28 @@ public:
* Reset the context to using the default queue for execution. * Reset the context to using the default queue for execution.
*/ */
void restoreDefaultQueue(); void restoreDefaultQueue();
/**
* Construct an uninitialized array of the appropriate class for this platform. The returned
* value should be created on the heap with the "new" operator.
*/
OpenCLArray* createArray();
/**
* Construct a ComputeEvent object of the appropriate class for this platform.
*/
ComputeEvent createEvent();
/**
* Compile source code to create a ComputeProgram.
*
* @param source the source code of the program
* @param defines a set of preprocessor definitions (name, value) to define when compiling the program
*/
ComputeProgram compileProgram(const std::string source, const std::map<std::string, std::string>& defines=std::map<std::string, std::string>());
/**
* Convert an array to an OpenCLArray. If the argument is already an OpenCLArray, this simply casts it.
* If the argument is a ComputeArray that wraps an OpenCLArray, this returns the wrapped array. For any
* other argument, this throws an exception.
*/
OpenCLArray& unwrap(ArrayInterface& array) const;
/** /**
* Get the array which contains the position (the xyz components) and charge (the w component) of each atom. * Get the array which contains the position (the xyz components) and charge (the w component) of each atom.
*/ */
...@@ -295,10 +280,14 @@ public: ...@@ -295,10 +280,14 @@ public:
return pinnedMemory; return pinnedMemory;
} }
/** /**
* Get the host-side vector which contains the index of each atom. * Get a shared ThreadPool that code can use to parallelize operations.
*
* Because this object is freely available to all code, care is needed to avoid conflicts. Only use it
* from the main thread, and make sure all operations are complete before you invoke any other code that
* might make use of it
*/ */
const std::vector<int>& getAtomIndex() const { ThreadPool& getThreadPool() {
return atomIndex; return getPlatformData().threads;
} }
/** /**
* Get the array which contains the index of each atom. * Get the array which contains the index of each atom.
...@@ -306,20 +295,6 @@ public: ...@@ -306,20 +295,6 @@ public:
OpenCLArray& getAtomIndexArray() { OpenCLArray& getAtomIndexArray() {
return atomIndexDevice; return atomIndexDevice;
} }
/**
* Get the number of cells by which the positions are offset.
*/
std::vector<mm_int4>& getPosCellOffsets() {
return posCellOffsets;
}
/**
* Replace all occurrences of a list of substrings.
*
* @param input a string to process
* @param replacements a set of strings that should be replaced with new strings wherever they appear in the input string
* @return a new string produced by performing the replacements
*/
std::string replaceStrings(const std::string& input, const std::map<std::string, std::string>& replacements) const;
/** /**
* Create an OpenCL Program from source code. * Create an OpenCL Program from source code.
* *
...@@ -348,7 +323,7 @@ public: ...@@ -348,7 +323,7 @@ public:
/** /**
* Set all elements of an array to 0. * Set all elements of an array to 0.
*/ */
void clearBuffer(OpenCLArray& array); void clearBuffer(ArrayInterface& array);
/** /**
* Set all elements of an array to 0. * Set all elements of an array to 0.
* *
...@@ -359,7 +334,7 @@ public: ...@@ -359,7 +334,7 @@ public:
/** /**
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation. * Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
*/ */
void addAutoclearBuffer(OpenCLArray& array); void addAutoclearBuffer(ArrayInterface& array);
/** /**
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation. * Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
* *
...@@ -447,19 +422,6 @@ public: ...@@ -447,19 +422,6 @@ public:
void setForcesValid(bool valid) { void setForcesValid(bool valid) {
forcesValid = valid; forcesValid = valid;
} }
/**
* Get the number of atoms.
*/
int getNumAtoms() const {
return numAtoms;
}
/**
* Get the number of atoms, rounded up to a multiple of TileSize. This is the actual size of
* most arrays with one element per atom.
*/
int getPaddedNumAtoms() const {
return paddedNumAtoms;
}
/** /**
* Get the number of blocks of TileSize atoms. * Get the number of blocks of TileSize atoms.
*/ */
...@@ -472,12 +434,25 @@ public: ...@@ -472,12 +434,25 @@ public:
int getNumThreadBlocks() const { int getNumThreadBlocks() const {
return numThreadBlocks; return numThreadBlocks;
} }
/**
* Get the maximum number of threads in a thread block supported by this device.
*/
int getMaxThreadBlockSize() const {
return device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
}
/** /**
* Get the number of force buffers. * Get the number of force buffers.
*/ */
int getNumForceBuffers() const { int getNumForceBuffers() const {
return numForceBuffers; return numForceBuffers;
} }
/**
* Get whether the device being used is a CPU. In some cases, different algorithms
* may be more efficient on CPUs and GPUs.
*/
bool getIsCPU() const {
return (device.getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
}
/** /**
* Get the SIMD width of the device being used. * Get the SIMD width of the device being used.
*/ */
...@@ -514,15 +489,6 @@ public: ...@@ -514,15 +489,6 @@ public:
bool getBoxIsTriclinic() const { bool getBoxIsTriclinic() const {
return boxIsTriclinic; return boxIsTriclinic;
} }
/**
* Convert a number to a string in a format suitable for including in a kernel.
* This takes into account whether the context uses single or double precision.
*/
std::string doubleToString(double value) const;
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
std::string intToString(int value) const;
/** /**
* Get the vectors defining the periodic box. * Get the vectors defining the periodic box.
*/ */
...@@ -630,6 +596,11 @@ public: ...@@ -630,6 +596,11 @@ public:
OpenCLNonbondedUtilities& getNonbondedUtilities() { OpenCLNonbondedUtilities& getNonbondedUtilities() {
return *nonbonded; return *nonbonded;
} }
/**
* This should be called by the Integrator from its own initialize() method.
* It ensures all contexts are fully initialized.
*/
void initializeContexts();
/** /**
* Set the particle charges. These are packed into the fourth element of the posq array. * Set the particle charges. These are packed into the fourth element of the posq array.
*/ */
...@@ -639,62 +610,6 @@ public: ...@@ -639,62 +610,6 @@ public:
* do that, this returns true the first time it is called, and false on all subsequent calls. * do that, this returns true the first time it is called, and false on all subsequent calls.
*/ */
bool requestPosqCharges(); bool requestPosqCharges();
/**
* Get the thread used by this context for executing parallel computations.
*/
WorkThread& getWorkThread() {
return *thread;
}
/**
* Get whether atoms were reordered during the most recent force/energy computation.
*/
bool getAtomsWereReordered() const {
return atomsWereReordered;
}
/**
* Set whether atoms were reordered during the most recent force/energy computation.
*/
void setAtomsWereReordered(bool wereReordered) {
atomsWereReordered = wereReordered;
}
/**
* Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close
* together in the arrays.
*/
void reorderAtoms();
/**
* Add a listener that should be called whenever atoms get reordered. The OpenCLContext
* assumes ownership of the object, and deletes it when the context itself is deleted.
*/
void addReorderListener(ReorderListener* listener);
/**
* Get the list of ReorderListeners.
*/
std::vector<ReorderListener*>& getReorderListeners() {
return reorderListeners;
}
/**
* Add a pre-computation that should be called at the very start of force and energy evaluations.
* The OpenCLContext assumes ownership of the object, and deletes it when the context itself is deleted.
*/
void addPreComputation(ForcePreComputation* computation);
/**
* Get the list of ForcePreComputations.
*/
std::vector<ForcePreComputation*>& getPreComputations() {
return preComputations;
}
/**
* Add a post-computation that should be called at the very end of force and energy evaluations.
* The OpenCLContext assumes ownership of the object, and deletes it when the context itself is deleted.
*/
void addPostComputation(ForcePostComputation* computation);
/**
* Get the list of ForcePostComputations.
*/
std::vector<ForcePostComputation*>& getPostComputations() {
return postComputations;
}
/** /**
* Get the names of all parameters with respect to which energy derivatives are computed. * Get the names of all parameters with respect to which energy derivatives are computed.
*/ */
...@@ -717,49 +632,22 @@ public: ...@@ -717,49 +632,22 @@ public:
*/ */
void addEnergyParameterDerivative(const std::string& param); void addEnergyParameterDerivative(const std::string& param);
/** /**
* Mark that the current molecule definitions (and hence the atom order) may be invalid. * Wait until all work that has been queued (kernel executions, asynchronous data transfers, etc.)
* This should be called whenever force field parameters change. It will cause the definitions * has been submitted to the device. This does not mean it has necessarily been completed.
* and order to be revalidated. * Calling this periodically may improve the responsiveness of the computer's GUI, but at the
*/ * expense of reduced simulation performance.
void invalidateMolecules();
/**
* Mark that the current molecule definitions from one particular force (and hence the atom order)
* may be invalid. This should be called whenever force field parameters change. It will cause the
* definitions and order to be revalidated.
*/ */
bool invalidateMolecules(OpenCLForceInfo* force); void flushQueue();
private: private:
struct Molecule;
struct MoleculeGroup;
class VirtualSiteInfo;
void findMoleculeGroups();
/**
* Ensure that all molecules marked as "identical" really are identical. This should be
* called whenever force field parameters change. If necessary, it will rebuild the list
* of molecules and resort the atoms.
*/
void validateMolecules();
/**
* This is the internal implementation of reorderAtoms(), templatized by the numerical precision in use.
*/
template <class Real, class Real4, class Mixed, class Mixed4>
void reorderAtomsImpl();
const System& system;
double time;
OpenCLPlatform::PlatformData& platformData; OpenCLPlatform::PlatformData& platformData;
int deviceIndex; int deviceIndex;
int platformIndex; int platformIndex;
int contextIndex; int contextIndex;
int stepCount;
int computeForceCount;
int stepsSinceReorder;
int numAtoms;
int paddedNumAtoms;
int numAtomBlocks; int numAtomBlocks;
int numThreadBlocks; int numThreadBlocks;
int numForceBuffers; int numForceBuffers;
int simdWidth; int simdWidth;
bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, atomsWereReordered, boxIsTriclinic, forcesValid, hasAssignedPosqCharges; bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, boxIsTriclinic, hasAssignedPosqCharges;
mm_float4 periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ; mm_float4 periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ;
mm_double4 periodicBoxSizeDouble, invPeriodicBoxSizeDouble, periodicBoxVecXDouble, periodicBoxVecYDouble, periodicBoxVecZDouble; mm_double4 periodicBoxSizeDouble, invPeriodicBoxSizeDouble, periodicBoxVecXDouble, periodicBoxVecYDouble, periodicBoxVecZDouble;
std::string defaultOptimizationOptions; std::string defaultOptimizationOptions;
...@@ -777,10 +665,6 @@ private: ...@@ -777,10 +665,6 @@ private:
cl::Kernel reduceForcesKernel; cl::Kernel reduceForcesKernel;
cl::Kernel reduceEnergyKernel; cl::Kernel reduceEnergyKernel;
cl::Kernel setChargesKernel; cl::Kernel setChargesKernel;
std::vector<OpenCLForceInfo*> forces;
std::vector<Molecule> molecules;
std::vector<MoleculeGroup> moleculeGroups;
std::vector<mm_int4> posCellOffsets;
cl::Buffer* pinnedBuffer; cl::Buffer* pinnedBuffer;
void* pinnedMemory; void* pinnedMemory;
OpenCLArray posq; OpenCLArray posq;
...@@ -796,118 +680,36 @@ private: ...@@ -796,118 +680,36 @@ private:
OpenCLArray chargeBuffer; OpenCLArray chargeBuffer;
std::vector<std::string> energyParamDerivNames; std::vector<std::string> energyParamDerivNames;
std::map<std::string, double> energyParamDerivWorkspace; std::map<std::string, double> energyParamDerivWorkspace;
std::vector<int> atomIndex;
std::vector<cl::Memory*> autoclearBuffers; std::vector<cl::Memory*> autoclearBuffers;
std::vector<int> autoclearBufferSizes; std::vector<int> autoclearBufferSizes;
std::vector<ReorderListener*> reorderListeners;
std::vector<ForcePreComputation*> preComputations;
std::vector<ForcePostComputation*> postComputations;
OpenCLIntegrationUtilities* integration; OpenCLIntegrationUtilities* integration;
OpenCLExpressionUtilities* expression; OpenCLExpressionUtilities* expression;
OpenCLBondedUtilities* bonded; OpenCLBondedUtilities* bonded;
OpenCLNonbondedUtilities* nonbonded; OpenCLNonbondedUtilities* nonbonded;
WorkThread* thread;
};
struct OpenCLContext::Molecule {
std::vector<int> atoms;
std::vector<int> constraints;
std::vector<std::vector<int> > groups;
};
struct OpenCLContext::MoleculeGroup {
std::vector<int> atoms;
std::vector<int> instances;
std::vector<int> offsets;
}; };
/** /**
* This abstract class defines a task to be executed on the worker thread. * This class exists only for backward compatibility. Use ComputeContext::WorkTask instead.
*/ */
class OPENMM_EXPORT_OPENCL OpenCLContext::WorkTask { class OPENMM_EXPORT_COMMON OpenCLContext::WorkTask : public ComputeContext::WorkTask {
public:
virtual void execute() = 0;
virtual ~WorkTask() {
}
};
class OPENMM_EXPORT_OPENCL OpenCLContext::WorkThread {
public:
struct ThreadData;
WorkThread();
~WorkThread();
/**
* Request that a task be executed on the worker thread. The argument should have been allocated on the
* heap with the "new" operator. After its execute() method finishes, the object will be deleted automatically.
*/
void addTask(OpenCLContext::WorkTask* task);
/**
* Get whether the worker thread is idle, waiting for a task to be added.
*/
bool isWaiting();
/**
* Get whether the worker thread has exited.
*/
bool isFinished();
/**
* Block until all tasks have finished executing and the worker thread is idle.
*/
void flush();
private:
std::queue<OpenCLContext::WorkTask*> tasks;
bool waiting, finished;
pthread_mutex_t queueLock;
pthread_cond_t waitForTaskCondition, queueEmptyCondition;
pthread_t thread;
}; };
/** /**
* This abstract class defines a function to be executed whenever atoms get reordered. * This class exists only for backward compatibility. Use ComputeContext::ReorderListener instead.
* Objects that need to know when reordering happens should create a ReorderListener
* and register it by calling addReorderListener().
*/ */
class OPENMM_EXPORT_OPENCL OpenCLContext::ReorderListener { class OPENMM_EXPORT_COMMON OpenCLContext::ReorderListener : public ComputeContext::ReorderListener {
public:
virtual void execute() = 0;
virtual ~ReorderListener() {
}
}; };
/** /**
* This abstract class defines a function to be executed at the very beginning of force and * This class exists only for backward compatibility. Use ComputeContext::ForcePreComputation instead.
* energy evaluation, before any other calculation has been done. It is useful for operations
* that need to be performed at a nonstandard point in the process. After creating a
* ForcePreComputation, register it by calling addForcePreComputation().
*/ */
class OPENMM_EXPORT_OPENCL OpenCLContext::ForcePreComputation { class OPENMM_EXPORT_COMMON OpenCLContext::ForcePreComputation : public ComputeContext::ForcePreComputation {
public:
virtual ~ForcePreComputation() {
}
/**
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
*/
virtual void computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) = 0;
}; };
/** /**
* This abstract class defines a function to be executed at the very end of force and * This class exists only for backward compatibility. Use ComputeContext::ForcePostComputation instead.
* energy evaluation, after all other calculations have been done. It is useful for operations
* that need to be performed at a nonstandard point in the process. After creating a
* ForcePostComputation, register it by calling addForcePostComputation().
*/ */
class OPENMM_EXPORT_OPENCL OpenCLContext::ForcePostComputation { class OPENMM_EXPORT_COMMON OpenCLContext::ForcePostComputation : public ComputeContext::ForcePostComputation {
public:
virtual ~ForcePostComputation() {
}
/**
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
* @return an optional contribution to add to the potential energy.
*/
virtual double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) = 0;
}; };
} // namespace OpenMM } // namespace OpenMM
......
#ifndef OPENMM_OPENCLEVENT_H_
#define OPENMM_OPENCLEVENT_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "OpenCLContext.h"
#include "openmm/common/ComputeEvent.h"
namespace OpenMM {
/**
* This is the OpenCL implementation of the ComputeKernelImpl interface.
*/
class OpenCLEvent : public ComputeEventImpl {
public:
OpenCLEvent(OpenCLContext& context);
/**
* Place the event into the device's execution queue.
*/
void enqueue();
/**
* Block until all operations started before the call to enqueue() have completed.
*/
void wait();
private:
OpenCLContext& context;
cl::Event event;
};
} // namespace OpenMM
#endif /*OPENMM_OPENCLEVENT_H_*/
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. * * Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -27,104 +27,20 @@ ...@@ -27,104 +27,20 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * * along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "OpenCLContext.h" #include "openmm/common/ExpressionUtilities.h"
#include "openmm/TabulatedFunction.h" #include "openmm/common/windowsExportCommon.h"
#include "lepton/CustomFunction.h"
#include "lepton/ExpressionTreeNode.h"
#include "lepton/ParsedExpression.h"
#include <map>
#include <sstream>
#include <string>
#include <utility>
namespace OpenMM { namespace OpenMM {
/** /**
* This class is used by various classes to generate OpenCL source code implementing * This class exists only for backward compatibility. It adds no features beyond
* user defined mathematical expressions. * the base ExpressionUtilities class.
*/ */
class OPENMM_EXPORT_OPENCL OpenCLExpressionUtilities { class OPENMM_EXPORT_COMMON OpenCLExpressionUtilities : public ExpressionUtilities {
public: public:
OpenCLExpressionUtilities(OpenCLContext& context); OpenCLExpressionUtilities(ComputeContext& context) : ExpressionUtilities(context) {
/** }
* Generate the source code for calculating a set of expressions.
*
* @param expressions the expressions to generate code for (keys are the variables to store the output values in)
* @param variables defines the source code to generate for each variable that may appear in the expressions. Keys are
* variable names, and the values are the code to generate for them.
* @param functions the tabulated functions that may appear in the expressions
* @param functionNames defines the variable name for each tabulated function that may appear in the expressions
* @param prefix a prefix to put in front of temporary variables
* @param tempType the type of value to use for temporary variables (defaults to "real")
*/
std::string createExpressions(const std::map<std::string, Lepton::ParsedExpression>& expressions, const std::map<std::string, std::string>& variables,
const std::vector<const TabulatedFunction*>& functions, const std::vector<std::pair<std::string, std::string> >& functionNames,
const std::string& prefix, const std::string& tempType="real");
/**
* Generate the source code for calculating a set of expressions.
*
* @param expressions the expressions to generate code for (keys are the variables to store the output values in)
* @param variables defines the source code to generate for each variable or precomputed sub-expression that may appear in the expressions.
* Each entry is an ExpressionTreeNode, and the code to generate wherever an identical node appears.
* @param functions the tabulated functions that may appear in the expressions
* @param functionNames defines the variable name for each tabulated function that may appear in the expressions
* @param prefix a prefix to put in front of temporary variables
* @param tempType the type of value to use for temporary variables (defaults to "float")
*/
std::string createExpressions(const std::map<std::string, Lepton::ParsedExpression>& expressions, const std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variables,
const std::vector<const TabulatedFunction*>& functions, const std::vector<std::pair<std::string, std::string> >& functionNames,
const std::string& prefix, const std::string& tempType="real");
/**
* Calculate the spline coefficients for a tabulated function that appears in expressions.
*
* @param function the function for which to compute coefficients
* @param width on output, the number of floats used for each value
* @return the spline coefficients
*/
std::vector<float> computeFunctionCoefficients(const TabulatedFunction& function, int& width);
/**
* Get a Lepton::CustomFunction that can be used to represent a TabulatedFunction when parsing expressions.
*
* @param function the function for which to get a placeholder
*/
Lepton::CustomFunction* getFunctionPlaceholder(const TabulatedFunction& function);
/**
* Get a Lepton::CustomFunction that can be used to represent the periodicdistance() function when parsing expressions.
*/
Lepton::CustomFunction* getPeriodicDistancePlaceholder();
private:
class FunctionPlaceholder : public Lepton::CustomFunction {
public:
FunctionPlaceholder(int numArgs) : numArgs(numArgs) {
}
int getNumArguments() const {
return numArgs;
}
double evaluate(const double* arguments) const {
return 0.0;
}
double evaluateDerivative(const double* arguments, const int* derivOrder) const {
return 0.0;
}
CustomFunction* clone() const {
return new FunctionPlaceholder(numArgs);
}
private:
int numArgs;
};
void processExpression(std::stringstream& out, const Lepton::ExpressionTreeNode& node,
std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& temps,
const std::vector<const TabulatedFunction*>& functions, const std::vector<std::pair<std::string, std::string> >& functionNames,
const std::string& prefix, const std::vector<std::vector<double> >& functionParams, const std::vector<Lepton::ParsedExpression>& allExpressions, const std::string& tempType);
std::string getTempName(const Lepton::ExpressionTreeNode& node, const std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& temps);
void findRelatedCustomFunctions(const Lepton::ExpressionTreeNode& node, const Lepton::ExpressionTreeNode& searchNode,
std::vector<const Lepton::ExpressionTreeNode*>& nodes);
void findRelatedPowers(const Lepton::ExpressionTreeNode& node, const Lepton::ExpressionTreeNode& searchNode,
std::map<int, const Lepton::ExpressionTreeNode*>& powers);
std::vector<std::vector<double> > computeFunctionParameters(const std::vector<const TabulatedFunction*>& functions);
OpenCLContext& context;
FunctionPlaceholder fp1, fp2, fp3, periodicDistance;
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -52,7 +52,7 @@ namespace OpenMM { ...@@ -52,7 +52,7 @@ namespace OpenMM {
* multiply every value of the original data set by the total number of data points. * multiply every value of the original data set by the total number of data points.
*/ */
class OPENMM_EXPORT_OPENCL OpenCLFFT3D { class OPENMM_EXPORT_COMMON OpenCLFFT3D {
public: public:
/** /**
* Create an OpenCLFFT3D object for performing transforms of a particular size. * Create an OpenCLFFT3D object for performing transforms of a particular size.
......
...@@ -27,17 +27,19 @@ ...@@ -27,17 +27,19 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * * along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "windowsExportOpenCL.h" #include "openmm/common/ComputeForceInfo.h"
#include "openmm/common/windowsExportCommon.h"
#include <vector> #include <vector>
namespace OpenMM { namespace OpenMM {
/** /**
* This class is used by the OpenCL implementation of a Force class to convey information * This class exists primarily for backward compatibility. Beyond the features of
* about the behavior and requirements of that force. * ComputeForceInfo, it adds the ability to specify a required number of force buffers.
* Using this mechanism is equivalent to calling requestForceBuffers() on the OpenCLContext.
*/ */
class OPENMM_EXPORT_OPENCL OpenCLForceInfo { class OPENMM_EXPORT_COMMON OpenCLForceInfo : public ComputeForceInfo {
public: public:
OpenCLForceInfo(int requiredForceBuffers) : requiredForceBuffers(requiredForceBuffers) { OpenCLForceInfo(int requiredForceBuffers) : requiredForceBuffers(requiredForceBuffers) {
} }
...@@ -47,22 +49,6 @@ public: ...@@ -47,22 +49,6 @@ public:
int getRequiredForceBuffers() { int getRequiredForceBuffers() {
return requiredForceBuffers; return requiredForceBuffers;
} }
/**
* Get whether or not two particles have identical force field parameters.
*/
virtual bool areParticlesIdentical(int particle1, int particle2);
/**
* Get the number of particle groups defined by this force.
*/
virtual int getNumParticleGroups();
/**
* Get the list of particles in a particular group.
*/
virtual void getParticlesInGroup(int index, std::vector<int>& particles);
/**
* Get whether two particle groups are identical.
*/
virtual bool areGroupsIdentical(int group1, int group2);
private: private:
int requiredForceBuffers; int requiredForceBuffers;
}; };
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2018 Stanford University and the Authors. * * Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -27,153 +27,44 @@ ...@@ -27,153 +27,44 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * * along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "OpenCLArray.h"
#include "openmm/System.h" #include "openmm/System.h"
#include "OpenCLContext.h" #include "openmm/common/IntegrationUtilities.h"
#include "windowsExportOpenCL.h" #include "openmm/common/windowsExportCommon.h"
#include <iosfwd>
#include <map>
namespace OpenMM { namespace OpenMM {
class OpenCLContext;
/** /**
* This class implements features that are used by many different integrators, including * This class implements features that are used by many different integrators, including
* common workspace arrays, random number generation, and enforcing constraints. * common workspace arrays, random number generation, and enforcing constraints.
*/ */
class OPENMM_EXPORT_OPENCL OpenCLIntegrationUtilities { class OPENMM_EXPORT_COMMON OpenCLIntegrationUtilities : public IntegrationUtilities {
public: public:
OpenCLIntegrationUtilities(OpenCLContext& context, const System& system); OpenCLIntegrationUtilities(OpenCLContext& context, const System& system);
/** /**
* Get the array which contains position deltas. * Get the array which contains position deltas.
*/ */
OpenCLArray& getPosDelta() { OpenCLArray& getPosDelta();
return posDelta;
}
/** /**
* Get the array which contains random values. Each element is a float4, whose components * Get the array which contains random values. Each element is a float4, whose components
* are independent, normally distributed random numbers with mean 0 and variance 1. * are independent, normally distributed random numbers with mean 0 and variance 1.
*/ */
OpenCLArray& getRandom() { OpenCLArray& getRandom();
return random;
}
/** /**
* Get the array which contains the current step size. * Get the array which contains the current step size.
*/ */
OpenCLArray& getStepSize() { OpenCLArray& getStepSize();
return stepSize;
}
/**
* Set the size to use for the next step.
*/
void setNextStepSize(double size);
/**
* Get the size that was used for the last step.
*/
double getLastStepSize();
/**
* Apply constraints to the atom positions.
*
* @param tol the constraint tolerance
*/
void applyConstraints(double tol);
/**
* Apply constraints to the atom velocities.
*
* @param tol the constraint tolerance
*/
void applyVelocityConstraints(double tol);
/**
* Initialize the random number generator.
*/
void initRandomNumberGenerator(unsigned int randomNumberSeed);
/**
* Ensure that sufficient random numbers are available in the array, and generate new ones if not.
*
* @param numValues the number of random float4's that will be required
* @return the index in the array at which to start reading
*/
int prepareRandomNumbers(int numValues);
/**
* Compute the positions of virtual sites.
*/
void computeVirtualSites();
/** /**
* Distribute forces from virtual sites to the atoms they are based on. * Distribute forces from virtual sites to the atoms they are based on.
*/ */
void distributeForcesFromVirtualSites(); void distributeForcesFromVirtualSites();
/**
* Create a checkpoint recording the current state of the random number generator.
*
* @param stream an output stream the checkpoint data should be written to
*/
void createCheckpoint(std::ostream& stream);
/**
* Load a checkpoint that was written by createCheckpoint().
*
* @param stream an input stream the checkpoint data should be read from
*/
void loadCheckpoint(std::istream& stream);
/**
* Compute the kinetic energy of the system, possibly shifting the velocities in time to account
* for a leapfrog integrator.
*
* @param timeShift the amount by which to shift the velocities in time
*/
double computeKineticEnergy(double timeShift);
/**
* Get the data structure that holds the state of all Nose-Hoover chains
*
* @return vector of chain states
*/
std::map<int, OpenCLArray>& getNoseHooverChainState();
private: private:
void applyConstraints(bool constrainVelocities, double tol); void applyConstraintsImpl(bool constrainVelocities, double tol);
OpenCLContext& context;
cl::Kernel settlePosKernel, settleVelKernel;
cl::Kernel shakePosKernel, shakeVelKernel;
cl::Kernel ccmaDirectionsKernel;
cl::Kernel ccmaPosForceKernel, ccmaVelForceKernel;
cl::Kernel ccmaMultiplyKernel;
cl::Kernel ccmaPosUpdateKernel, ccmaVelUpdateKernel;
cl::Kernel vsitePositionKernel, vsiteForceKernel, vsiteAddForcesKernel;
cl::Kernel randomKernel, timeShiftKernel;
OpenCLArray posDelta;
OpenCLArray settleAtoms;
OpenCLArray settleParams;
OpenCLArray shakeAtoms;
OpenCLArray shakeParams;
OpenCLArray random;
OpenCLArray randomSeed;
OpenCLArray stepSize;
OpenCLArray ccmaAtoms;
OpenCLArray ccmaDistance;
OpenCLArray ccmaReducedMass;
OpenCLArray ccmaAtomConstraints;
OpenCLArray ccmaNumAtomConstraints;
OpenCLArray ccmaConstraintMatrixColumn;
OpenCLArray ccmaConstraintMatrixValue;
OpenCLArray ccmaDelta1;
OpenCLArray ccmaDelta2;
OpenCLArray ccmaConverged;
OpenCLArray ccmaConvergedHostBuffer; OpenCLArray ccmaConvergedHostBuffer;
OpenCLArray vsite2AvgAtoms; bool ccmaUseDirectBuffer;
OpenCLArray vsite2AvgWeights;
OpenCLArray vsite3AvgAtoms;
OpenCLArray vsite3AvgWeights;
OpenCLArray vsiteOutOfPlaneAtoms;
OpenCLArray vsiteOutOfPlaneWeights;
OpenCLArray vsiteLocalCoordsIndex;
OpenCLArray vsiteLocalCoordsAtoms;
OpenCLArray vsiteLocalCoordsWeights;
OpenCLArray vsiteLocalCoordsPos;
OpenCLArray vsiteLocalCoordsStartIndex;
int randomPos;
int lastSeed, numVsites;
bool hasInitializedPosConstraintKernels, hasInitializedVelConstraintKernels, ccmaUseDirectBuffer, hasOverlappingVsites;
mm_double2 lastStepSize;
struct ShakeCluster;
struct ConstraintOrderer;
std::map<int, OpenCLArray> noseHooverChainState;
}; };
} // namespace OpenMM } // namespace OpenMM
......
#ifndef OPENMM_OPENCLKERNEL_H_
#define OPENMM_OPENCLKERNEL_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "OpenCLArray.h"
#include "OpenCLContext.h"
#include <cl.hpp>
#include <string>
#include <vector>
namespace OpenMM {
/**
* This is the OpenCL implementation of the ComputeKernelImpl interface.
*/
class OpenCLKernel : public ComputeKernelImpl {
public:
/**
* Create a new OpenCLKernel.
*
* @param context the context this kernel belongs to
* @param kernel the kernel to be invoked
*/
OpenCLKernel(OpenCLContext& context, cl::Kernel kernel);
/**
* Get the name of this kernel.
*/
std::string getName() const;
/**
* Execute this kernel.
*
* @param threads the maximum number of threads that should be used. Depending on the
* computing device, it may choose to use fewer threads than this number.
* @param blockSize the number of threads in each thread block. If this is omitted, a
* default size that is appropriate for the computing device is used.
*/
void execute(int threads, int blockSize=-1);
protected:
/**
* Add an argument to pass the kernel when it is invoked, where the value is a
* subclass of ArrayInterface.
*
* @param value the value to pass to the kernel
*/
void addArrayArg(ArrayInterface& value);
/**
* Add an argument to pass the kernel when it is invoked, where the value is a primitive type.
*
* @param value a pointer to the argument value
* @param size the size of the value in bytes
*/
void addPrimitiveArg(const void* value, int size);
/**
* Add a placeholder for an argument without specifying its value.
*/
void addEmptyArg();
/**
* Add an argument to pass the kernel when it is invoked, where the value is a
* subclass of ArrayInterface.
*
* @param index the index of the argument to set
* @param value the value to pass to the kernel
*/
void setArrayArg(int index, ArrayInterface& value);
/**
* Add an argument to pass the kernel when it is invoked, where the value is a primitive type.
*
* @param index the index of the argument to set
* @param value a pointer to the argument value
* @param size the size of the value in bytes
*/
void setPrimitiveArg(int index, const void* value, int size);
private:
OpenCLContext& context;
cl::Kernel kernel;
std::vector<OpenCLArray*> arrayArgs;
};
} // namespace OpenMM
#endif /*OPENMM_OPENCLKERNEL_H_*/
...@@ -42,7 +42,6 @@ ...@@ -42,7 +42,6 @@
namespace OpenMM { namespace OpenMM {
/** /**
* This kernel is invoked at the beginning and end of force and energy computations. It gives the * This kernel is invoked at the beginning and end of force and energy computations. It gives the
* Platform a chance to clear buffers and do other initialization at the beginning, and to do any * Platform a chance to clear buffers and do other initialization at the beginning, and to do any
...@@ -238,349 +237,6 @@ private: ...@@ -238,349 +237,6 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
}; };
/**
* This kernel is invoked by HarmonicBondForce to calculate the forces acting on the system and the energy of the system.
*/
class OpenCLCalcHarmonicBondForceKernel : public CalcHarmonicBondForceKernel {
public:
OpenCLCalcHarmonicBondForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcHarmonicBondForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), system(system) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the HarmonicBondForce this kernel will be used for
*/
void initialize(const System& system, const HarmonicBondForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the HarmonicBondForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force);
private:
class ForceInfo;
int numBonds;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
const System& system;
OpenCLArray params;
};
/**
* This kernel is invoked by CustomBondForce to calculate the forces acting on the system and the energy of the system.
*/
class OpenCLCalcCustomBondForceKernel : public CalcCustomBondForceKernel {
public:
OpenCLCalcCustomBondForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomBondForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), system(system), params(NULL) {
}
~OpenCLCalcCustomBondForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomBondForce this kernel will be used for
*/
void initialize(const System& system, const CustomBondForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomBondForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomBondForce& force);
private:
class ForceInfo;
int numBonds;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
const System& system;
OpenCLParameterSet* params;
OpenCLArray globals;
std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues;
};
/**
* This kernel is invoked by HarmonicAngleForce to calculate the forces acting on the system and the energy of the system.
*/
class OpenCLCalcHarmonicAngleForceKernel : public CalcHarmonicAngleForceKernel {
public:
OpenCLCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcHarmonicAngleForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), system(system) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the HarmonicAngleForce this kernel will be used for
*/
void initialize(const System& system, const HarmonicAngleForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the HarmonicAngleForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force);
private:
class ForceInfo;
int numAngles;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
const System& system;
OpenCLArray params;
};
/**
* This kernel is invoked by CustomAngleForce to calculate the forces acting on the system and the energy of the system.
*/
class OpenCLCalcCustomAngleForceKernel : public CalcCustomAngleForceKernel {
public:
OpenCLCalcCustomAngleForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomAngleForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), system(system), params(NULL) {
}
~OpenCLCalcCustomAngleForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomAngleForce this kernel will be used for
*/
void initialize(const System& system, const CustomAngleForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomAngleForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomAngleForce& force);
private:
class ForceInfo;
int numAngles;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
const System& system;
OpenCLParameterSet* params;
OpenCLArray globals;
std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues;
};
/**
* This kernel is invoked by PeriodicTorsionForce to calculate the forces acting on the system and the energy of the system.
*/
class OpenCLCalcPeriodicTorsionForceKernel : public CalcPeriodicTorsionForceKernel {
public:
OpenCLCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcPeriodicTorsionForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), system(system) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the PeriodicTorsionForce this kernel will be used for
*/
void initialize(const System& system, const PeriodicTorsionForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the PeriodicTorsionForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force);
private:
class ForceInfo;
int numTorsions;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
const System& system;
OpenCLArray params;
};
/**
* This kernel is invoked by RBTorsionForce to calculate the forces acting on the system and the energy of the system.
*/
class OpenCLCalcRBTorsionForceKernel : public CalcRBTorsionForceKernel {
public:
OpenCLCalcRBTorsionForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcRBTorsionForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), system(system) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the RBTorsionForce this kernel will be used for
*/
void initialize(const System& system, const RBTorsionForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the RBTorsionForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const RBTorsionForce& force);
private:
class ForceInfo;
int numTorsions;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
const System& system;
OpenCLArray params;
};
/**
* This kernel is invoked by CMAPTorsionForce to calculate the forces acting on the system and the energy of the system.
*/
class OpenCLCalcCMAPTorsionForceKernel : public CalcCMAPTorsionForceKernel {
public:
OpenCLCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCMAPTorsionForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), system(system) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CMAPTorsionForce this kernel will be used for
*/
void initialize(const System& system, const CMAPTorsionForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CMAPTorsionForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CMAPTorsionForce& force);
private:
class ForceInfo;
int numTorsions;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
const System& system;
std::vector<mm_int2> mapPositionsVec;
OpenCLArray coefficients;
OpenCLArray mapPositions;
OpenCLArray torsionMaps;
};
/**
* This kernel is invoked by CustomTorsionForce to calculate the forces acting on the system and the energy of the system.
*/
class OpenCLCalcCustomTorsionForceKernel : public CalcCustomTorsionForceKernel {
public:
OpenCLCalcCustomTorsionForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomTorsionForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), system(system), params(NULL) {
}
~OpenCLCalcCustomTorsionForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomTorsionForce this kernel will be used for
*/
void initialize(const System& system, const CustomTorsionForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomTorsionForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force);
private:
class ForceInfo;
int numTorsions;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
const System& system;
OpenCLParameterSet* params;
OpenCLArray globals;
std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues;
};
/** /**
* This kernel is invoked by NonbondedForce to calculate the forces acting on the system. * This kernel is invoked by NonbondedForce to calculate the forces acting on the system.
*/ */
...@@ -718,636 +374,60 @@ private: ...@@ -718,636 +374,60 @@ private:
}; };
/** /**
* This kernel is invoked by CustomNonbondedForce to calculate the forces acting on the system. * This kernel is invoked by CustomCVForce to calculate the forces acting on the system and the energy of the system.
*/ */
class OpenCLCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel { class OpenCLCalcCustomCVForceKernel : public CalcCustomCVForceKernel {
public: public:
OpenCLCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomNonbondedForceKernel(name, platform), OpenCLCalcCustomCVForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcCustomCVForceKernel(name, platform),
cl(cl), params(NULL), forceCopy(NULL), system(system), hasInitializedKernel(false) { cl(cl), hasInitializedKernels(false) {
} }
~OpenCLCalcCustomNonbondedForceKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
* @param system the System this kernel will be applied to * @param system the System this kernel will be applied to
* @param force the CustomNonbondedForce this kernel will be used for * @param force the CustomCVForce this kernel will be used for
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/ */
void initialize(const System& system, const CustomNonbondedForce& force); void initialize(const System& system, const CustomCVForce& force, ContextImpl& innerContext);
/** /**
* Execute the kernel to calculate the forces and/or energy. * Execute the kernel to calculate the forces and/or energy.
* *
* @param context the context in which to execute this kernel * @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
* @param includeForces true if forces should be calculated * @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated * @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force * @return the potential energy due to the force
*/ */
double execute(ContextImpl& context, bool includeForces, bool includeEnergy); double execute(ContextImpl& context, ContextImpl& innerContext, bool includeForces, bool includeEnergy);
/**
* Copy state information to the inner context.
*
* @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/
void copyState(ContextImpl& context, ContextImpl& innerContext);
/** /**
* Copy changed parameters over to a context. * Copy changed parameters over to a context.
* *
* @param context the context to copy parameters to * @param context the context to copy parameters to
* @param force the CustomNonbondedForce to copy the parameters from * @param force the CustomCVForce to copy the parameters from
*/ */
void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force); void copyParametersToContext(ContextImpl& context, const CustomCVForce& force);
private: private:
class ForceInfo; class ForceInfo;
void initInteractionGroups(const CustomNonbondedForce& force, const std::string& interactionSource, const std::vector<std::string>& tableTypes); class ReorderListener;
OpenCLContext& cl;
ForceInfo* info;
OpenCLParameterSet* params;
OpenCLArray globals;
OpenCLArray interactionGroupData, filteredGroupData, numGroupTiles;
cl::Kernel interactionGroupKernel, prepareNeighborListKernel, buildNeighborListKernel;
std::vector<void*> interactionGroupArgs;
std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray> tabulatedFunctions;
double longRangeCoefficient;
std::vector<double> longRangeCoefficientDerivs;
bool hasInitializedLongRangeCorrection, hasInitializedKernel, hasParamDerivs, useNeighborList;
int numGroupThreadBlocks;
CustomNonbondedForce* forceCopy;
const System& system;
};
/**
* This kernel is invoked by GBSAOBCForce to calculate the forces acting on the system.
*/
class OpenCLCalcGBSAOBCForceKernel : public CalcGBSAOBCForceKernel {
public:
OpenCLCalcGBSAOBCForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcGBSAOBCForceKernel(name, platform), cl(cl),
hasCreatedKernels(false) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the GBSAOBCForce this kernel will be used for
*/
void initialize(const System& system, const GBSAOBCForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the GBSAOBCForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force);
private:
class ForceInfo;
double prefactor, surfaceAreaFactor, cutoff;
bool hasCreatedKernels;
int maxTiles;
OpenCLContext& cl;
ForceInfo* info;
OpenCLArray params;
OpenCLArray charges;
OpenCLArray bornSum;
OpenCLArray longBornSum;
OpenCLArray bornRadii;
OpenCLArray bornForce;
OpenCLArray longBornForce;
OpenCLArray obcChain;
cl::Kernel computeBornSumKernel;
cl::Kernel reduceBornSumKernel;
cl::Kernel force1Kernel;
cl::Kernel reduceBornForceKernel;
};
/**
* This kernel is invoked by CustomGBForce to calculate the forces acting on the system.
*/
class OpenCLCalcCustomGBForceKernel : public CalcCustomGBForceKernel {
public:
OpenCLCalcCustomGBForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomGBForceKernel(name, platform),
hasInitializedKernels(false), cl(cl), params(NULL), computedValues(NULL), energyDerivs(NULL), energyDerivChain(NULL), system(system) {
}
~OpenCLCalcCustomGBForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomGBForce this kernel will be used for
*/
void initialize(const System& system, const CustomGBForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomGBForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomGBForce& force);
private:
class ForceInfo;
double cutoff;
bool hasInitializedKernels, needParameterGradient, needEnergyParamDerivs;
int maxTiles, numComputedValues;
OpenCLContext& cl;
ForceInfo* info;
OpenCLParameterSet* params;
OpenCLParameterSet* computedValues;
OpenCLParameterSet* energyDerivs;
OpenCLParameterSet* energyDerivChain;
std::vector<OpenCLParameterSet*> dValuedParam;
std::vector<OpenCLArray> dValue0dParam;
OpenCLArray longEnergyDerivs;
OpenCLArray globals;
OpenCLArray valueBuffers;
OpenCLArray longValueBuffers;
std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray> tabulatedFunctions;
std::vector<bool> pairValueUsesParam, pairEnergyUsesParam, pairEnergyUsesValue;
const System& system;
cl::Kernel pairValueKernel, perParticleValueKernel, pairEnergyKernel, perParticleEnergyKernel, gradientChainRuleKernel;
std::string pairValueSrc, pairEnergySrc;
std::map<std::string, std::string> pairValueDefines, pairEnergyDefines;
};
/**
* This kernel is invoked by CustomExternalForce to calculate the forces acting on the system and the energy of the system.
*/
class OpenCLCalcCustomExternalForceKernel : public CalcCustomExternalForceKernel {
public:
OpenCLCalcCustomExternalForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomExternalForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), system(system), params(NULL) {
}
~OpenCLCalcCustomExternalForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomExternalForce this kernel will be used for
*/
void initialize(const System& system, const CustomExternalForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomExternalForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomExternalForce& force);
private:
class ForceInfo;
int numParticles;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
const System& system;
OpenCLParameterSet* params;
OpenCLArray globals;
std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues;
};
/**
* This kernel is invoked by CustomHbondForce to calculate the forces acting on the system.
*/
class OpenCLCalcCustomHbondForceKernel : public CalcCustomHbondForceKernel {
public:
OpenCLCalcCustomHbondForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomHbondForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), donorParams(NULL), acceptorParams(NULL), system(system) {
}
~OpenCLCalcCustomHbondForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomHbondForce this kernel will be used for
*/
void initialize(const System& system, const CustomHbondForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomHbondForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomHbondForce& force);
private:
class ForceInfo;
int numDonors, numAcceptors;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
OpenCLParameterSet* donorParams;
OpenCLParameterSet* acceptorParams;
OpenCLArray globals;
OpenCLArray donors;
OpenCLArray acceptors;
OpenCLArray donorBufferIndices;
OpenCLArray acceptorBufferIndices;
OpenCLArray donorExclusions;
OpenCLArray acceptorExclusions;
std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray> tabulatedFunctions;
const System& system;
cl::Kernel donorKernel, acceptorKernel;
};
/**
* This kernel is invoked by CustomCentroidBondForce to calculate the forces acting on the system.
*/
class OpenCLCalcCustomCentroidBondForceKernel : public CalcCustomCentroidBondForceKernel {
public:
OpenCLCalcCustomCentroidBondForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomCentroidBondForceKernel(name, platform),
cl(cl), params(NULL), system(system) {
}
~OpenCLCalcCustomCentroidBondForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomCentroidBondForce this kernel will be used for
*/
void initialize(const System& system, const CustomCentroidBondForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomCentroidBondForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomCentroidBondForce& force);
private:
class ForceInfo;
int numGroups, numBonds;
bool needEnergyParamDerivs;
OpenCLContext& cl;
ForceInfo* info;
OpenCLParameterSet* params;
OpenCLArray globals;
OpenCLArray groupParticles;
OpenCLArray groupWeights;
OpenCLArray groupOffsets;
OpenCLArray groupForces;
OpenCLArray bondGroups;
OpenCLArray centerPositions;
std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray> tabulatedFunctions;
cl::Kernel computeCentersKernel, groupForcesKernel, applyForcesKernel;
const System& system;
};
/**
* This kernel is invoked by CustomCompoundBondForce to calculate the forces acting on the system.
*/
class OpenCLCalcCustomCompoundBondForceKernel : public CalcCustomCompoundBondForceKernel {
public:
OpenCLCalcCustomCompoundBondForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomCompoundBondForceKernel(name, platform),
cl(cl), params(NULL), system(system) {
}
~OpenCLCalcCustomCompoundBondForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomCompoundBondForce this kernel will be used for
*/
void initialize(const System& system, const CustomCompoundBondForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomCompoundBondForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomCompoundBondForce& force);
private:
class ForceInfo;
int numBonds;
OpenCLContext& cl;
ForceInfo* info;
OpenCLParameterSet* params;
OpenCLArray globals;
std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray> tabulatedFunctions;
const System& system;
};
/**
* This kernel is invoked by CustomManyParticleForce to calculate the forces acting on the system.
*/
class OpenCLCalcCustomManyParticleForceKernel : public CalcCustomManyParticleForceKernel {
public:
OpenCLCalcCustomManyParticleForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomManyParticleForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), params(NULL), system(system) {
}
~OpenCLCalcCustomManyParticleForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomManyParticleForce this kernel will be used for
*/
void initialize(const System& system, const CustomManyParticleForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomManyParticleForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomManyParticleForce& force);
private:
class ForceInfo;
OpenCLContext& cl;
ForceInfo* info;
bool hasInitializedKernel;
NonbondedMethod nonbondedMethod;
int maxNeighborPairs, forceWorkgroupSize, findNeighborsWorkgroupSize;
OpenCLParameterSet* params;
OpenCLArray globals;
OpenCLArray particleTypes;
OpenCLArray orderIndex;
OpenCLArray particleOrder;
OpenCLArray exclusions;
OpenCLArray exclusionStartIndex;
OpenCLArray blockCenter;
OpenCLArray blockBoundingBox;
OpenCLArray neighborPairs;
OpenCLArray numNeighborPairs;
OpenCLArray neighborStartIndex;
OpenCLArray numNeighborsForAtom;
OpenCLArray neighbors;
std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues;
std::vector<OpenCLArray> tabulatedFunctions;
const System& system;
cl::Kernel forceKernel, blockBoundsKernel, neighborsKernel, startIndicesKernel, copyPairsKernel;
};
/**
* This kernel is invoked by GayBerneForce to calculate the forces acting on the system.
*/
class OpenCLCalcGayBerneForceKernel : public CalcGayBerneForceKernel {
public:
OpenCLCalcGayBerneForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcGayBerneForceKernel(name, platform), cl(cl),
hasInitializedKernels(false) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the GayBerneForce this kernel will be used for
*/
void initialize(const System& system, const GayBerneForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the GayBerneForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const GayBerneForce& force);
private:
class ForceInfo;
class ReorderListener;
void sortAtoms();
OpenCLContext& cl;
ForceInfo* info;
bool hasInitializedKernels;
int numRealParticles, maxNeighborBlocks;
GayBerneForce::NonbondedMethod nonbondedMethod;
OpenCLArray sortedParticles;
OpenCLArray axisParticleIndices;
OpenCLArray sigParams;
OpenCLArray epsParams;
OpenCLArray scale;
OpenCLArray exceptionParticles;
OpenCLArray exceptionParams;
OpenCLArray aMatrix;
OpenCLArray bMatrix;
OpenCLArray gMatrix;
OpenCLArray exclusions;
OpenCLArray exclusionStartIndex;
OpenCLArray blockCenter;
OpenCLArray blockBoundingBox;
OpenCLArray neighbors;
OpenCLArray neighborIndex;
OpenCLArray neighborBlockCount;
OpenCLArray sortedPos;
OpenCLArray torque;
std::vector<bool> isRealParticle;
std::vector<std::pair<int, int> > exceptionAtoms;
std::vector<std::pair<int, int> > excludedPairs;
cl::Kernel framesKernel, blockBoundsKernel, neighborsKernel, forceKernel, torqueKernel;
};
/**
* This kernel is invoked by CustomCVForce to calculate the forces acting on the system and the energy of the system.
*/
class OpenCLCalcCustomCVForceKernel : public CalcCustomCVForceKernel {
public:
OpenCLCalcCustomCVForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcCustomCVForceKernel(name, platform),
cl(cl), hasInitializedKernels(false) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomCVForce this kernel will be used for
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/
void initialize(const System& system, const CustomCVForce& force, ContextImpl& innerContext);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, ContextImpl& innerContext, bool includeForces, bool includeEnergy);
/**
* Copy state information to the inner context.
*
* @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/
void copyState(ContextImpl& context, ContextImpl& innerContext);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomCVForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomCVForce& force);
private:
class ForceInfo;
class ReorderListener;
OpenCLContext& cl;
bool hasInitializedKernels;
Lepton::ExpressionProgram energyExpression;
std::vector<std::string> variableNames, paramDerivNames, globalParameterNames;
std::vector<Lepton::ExpressionProgram> variableDerivExpressions;
std::vector<Lepton::ExpressionProgram> paramDerivExpressions;
std::vector<OpenCLArray> cvForces;
OpenCLArray invAtomOrder;
OpenCLArray innerInvAtomOrder;
cl::Kernel copyStateKernel, copyForcesKernel, addForcesKernel;
};
/**
* This kernel is invoked by RMSDForce to calculate the forces acting on the system and the energy of the system.
*/
class OpenCLCalcRMSDForceKernel : public CalcRMSDForceKernel {
public:
OpenCLCalcRMSDForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcRMSDForceKernel(name, platform), cl(cl) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the RMSDForce this kernel will be used for
*/
void initialize(const System& system, const RMSDForce& force);
/**
* Record the reference positions and particle indices.
*/
void recordParameters(const RMSDForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* This is the internal implementation of execute(), templatized on whether we're
* using single or double precision.
*/
template <class REAL>
double executeImpl(ContextImpl& context);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the RMSDForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const RMSDForce& force);
private:
class ForceInfo;
OpenCLContext& cl;
ForceInfo* info;
double sumNormRef;
OpenCLArray referencePos;
OpenCLArray particles;
OpenCLArray buffer;
cl::Kernel kernel1, kernel2;
};
/**
* This kernel is invoked by VerletIntegrator to take one time step.
*/
class OpenCLIntegrateVerletStepKernel : public IntegrateVerletStepKernel {
public:
OpenCLIntegrateVerletStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateVerletStepKernel(name, platform), cl(cl),
hasInitializedKernels(false) {
}
~OpenCLIntegrateVerletStepKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the VerletIntegrator this kernel will be used for
*/
void initialize(const System& system, const VerletIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the VerletIntegrator this kernel is being used for
*/
void execute(ContextImpl& context, const VerletIntegrator& integrator);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the VerletIntegrator this kernel is being used for
*/
double computeKineticEnergy(ContextImpl& context, const VerletIntegrator& integrator);
private:
OpenCLContext& cl; OpenCLContext& cl;
bool hasInitializedKernels; bool hasInitializedKernels;
cl::Kernel kernel1, kernel2; Lepton::ExpressionProgram energyExpression;
std::vector<std::string> variableNames, paramDerivNames, globalParameterNames;
std::vector<Lepton::ExpressionProgram> variableDerivExpressions;
std::vector<Lepton::ExpressionProgram> paramDerivExpressions;
std::vector<OpenCLArray> cvForces;
OpenCLArray invAtomOrder;
OpenCLArray innerInvAtomOrder;
cl::Kernel copyStateKernel, copyForcesKernel, addForcesKernel;
}; };
/* /*
* This kernel is invoked by NoseHooverIntegrator to take one time step. * This kernel is invoked by NoseHooverIntegrator to take one time step.
*/ */
...@@ -1386,368 +466,6 @@ private: ...@@ -1386,368 +466,6 @@ private:
cl::Kernel kernel1, kernel2, kernel3, kernelHardWall; cl::Kernel kernel1, kernel2, kernel3, kernelHardWall;
}; };
/**
* This kernel is invoked by LangevinIntegrator to take one time step.
*/
class OpenCLIntegrateLangevinStepKernel : public IntegrateLangevinStepKernel {
public:
OpenCLIntegrateLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateLangevinStepKernel(name, platform), cl(cl),
hasInitializedKernels(false) {
}
/**
* Initialize the kernel, setting up the particle masses.
*
* @param system the System this kernel will be applied to
* @param integrator the LangevinIntegrator this kernel will be used for
*/
void initialize(const System& system, const LangevinIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the LangevinIntegrator this kernel is being used for
*/
void execute(ContextImpl& context, const LangevinIntegrator& integrator);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the LangevinIntegrator this kernel is being used for
*/
double computeKineticEnergy(ContextImpl& context, const LangevinIntegrator& integrator);
private:
OpenCLContext& cl;
double prevTemp, prevFriction, prevStepSize;
bool hasInitializedKernels;
OpenCLArray params;
cl::Kernel kernel1, kernel2;
};
/**
* This kernel is invoked by BAOABLangevinIntegrator to take one time step.
*/
class OpenCLIntegrateBAOABStepKernel : public IntegrateBAOABStepKernel {
public:
OpenCLIntegrateBAOABStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateBAOABStepKernel(name, platform), cl(cl),
hasInitializedKernels(false) {
}
/**
* Initialize the kernel, setting up the particle masses.
*
* @param system the System this kernel will be applied to
* @param integrator the BAOABLangevinIntegrator this kernel will be used for
*/
void initialize(const System& system, const BAOABLangevinIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the BAOABLangevinIntegrator this kernel is being used for
* @param forcesAreValid if the context has been modified since the last time step, this will be
* false to show that cached forces are invalid and must be recalculated.
* On exit, this should specify whether the cached forces are valid at the
* end of the step.
*/
void execute(ContextImpl& context, const BAOABLangevinIntegrator& integrator, bool& forcesAreValid);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the BAOABLangevinIntegrator this kernel is being used for
*/
double computeKineticEnergy(ContextImpl& context, const BAOABLangevinIntegrator& integrator);
private:
OpenCLContext& cl;
double prevTemp, prevFriction, prevStepSize;
bool hasInitializedKernels;
OpenCLArray params, oldDelta;
cl::Kernel kernel1, kernel2, kernel3, kernel4;
};
/**
* This kernel is invoked by BrownianIntegrator to take one time step.
*/
class OpenCLIntegrateBrownianStepKernel : public IntegrateBrownianStepKernel {
public:
OpenCLIntegrateBrownianStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateBrownianStepKernel(name, platform), cl(cl),
hasInitializedKernels(false), prevTemp(-1), prevFriction(-1), prevStepSize(-1) {
}
~OpenCLIntegrateBrownianStepKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the BrownianIntegrator this kernel will be used for
*/
void initialize(const System& system, const BrownianIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the BrownianIntegrator this kernel is being used for
*/
void execute(ContextImpl& context, const BrownianIntegrator& integrator);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the BrownianIntegrator this kernel is being used for
*/
double computeKineticEnergy(ContextImpl& context, const BrownianIntegrator& integrator);
private:
OpenCLContext& cl;
double prevTemp, prevFriction, prevStepSize;
bool hasInitializedKernels;
cl::Kernel kernel1, kernel2;
};
/**
* This kernel is invoked by VariableVerletIntegrator to take one time step.
*/
class OpenCLIntegrateVariableVerletStepKernel : public IntegrateVariableVerletStepKernel {
public:
OpenCLIntegrateVariableVerletStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateVariableVerletStepKernel(name, platform), cl(cl),
hasInitializedKernels(false) {
}
~OpenCLIntegrateVariableVerletStepKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the VariableVerletIntegrator this kernel will be used for
*/
void initialize(const System& system, const VariableVerletIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the VariableVerletIntegrator this kernel is being used for
* @param maxTime the maximum time beyond which the simulation should not be advanced
* @return the size of the step that was taken
*/
double execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the VariableVerletIntegrator this kernel is being used for
*/
double computeKineticEnergy(ContextImpl& context, const VariableVerletIntegrator& integrator);
private:
OpenCLContext& cl;
bool hasInitializedKernels;
int blockSize;
cl::Kernel kernel1, kernel2, selectSizeKernel;
};
/**
* This kernel is invoked by VariableLangevinIntegrator to take one time step.
*/
class OpenCLIntegrateVariableLangevinStepKernel : public IntegrateVariableLangevinStepKernel {
public:
OpenCLIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateVariableLangevinStepKernel(name, platform), cl(cl),
hasInitializedKernels(false) {
}
/**
* Initialize the kernel, setting up the particle masses.
*
* @param system the System this kernel will be applied to
* @param integrator the VariableLangevinIntegrator this kernel will be used for
*/
void initialize(const System& system, const VariableLangevinIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the VariableLangevinIntegrator this kernel is being used for
* @param maxTime the maximum time beyond which the simulation should not be advanced
* @return the size of the step that was taken
*/
double execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the VariableLangevinIntegrator this kernel is being used for
*/
double computeKineticEnergy(ContextImpl& context, const VariableLangevinIntegrator& integrator);
private:
OpenCLContext& cl;
bool hasInitializedKernels;
int blockSize;
OpenCLArray params;
cl::Kernel kernel1, kernel2, selectSizeKernel;
double prevTemp, prevFriction, prevErrorTol;
};
/**
* This kernel is invoked by CustomIntegrator to take one time step.
*/
class OpenCLIntegrateCustomStepKernel : public IntegrateCustomStepKernel {
public:
enum GlobalTargetType {DT, VARIABLE, PARAMETER};
OpenCLIntegrateCustomStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateCustomStepKernel(name, platform), cl(cl),
hasInitializedKernels(false), needsEnergyParamDerivs(false) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the CustomIntegrator this kernel will be used for
*/
void initialize(const System& system, const CustomIntegrator& integrator);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the CustomIntegrator this kernel is being used for
* @param forcesAreValid if the context has been modified since the last time step, this will be
* false to show that cached forces are invalid and must be recalculated.
* On exit, this should specify whether the cached forces are valid at the
* end of the step.
*/
void execute(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
/**
* Compute the kinetic energy.
*
* @param context the context in which to execute this kernel
* @param integrator the CustomIntegrator this kernel is being used for
* @param forcesAreValid if the context has been modified since the last time step, this will be
* false to show that cached forces are invalid and must be recalculated.
* On exit, this should specify whether the cached forces are valid at the
* end of the step.
*/
double computeKineticEnergy(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
/**
* Get the values of all global variables.
*
* @param context the context in which to execute this kernel
* @param values on exit, this contains the values
*/
void getGlobalVariables(ContextImpl& context, std::vector<double>& values) const;
/**
* Set the values of all global variables.
*
* @param context the context in which to execute this kernel
* @param values a vector containing the values
*/
void setGlobalVariables(ContextImpl& context, const std::vector<double>& values);
/**
* Get the values of a per-DOF variable.
*
* @param context the context in which to execute this kernel
* @param variable the index of the variable to get
* @param values on exit, this contains the values
*/
void getPerDofVariable(ContextImpl& context, int variable, std::vector<Vec3>& values) const;
/**
* Set the values of a per-DOF variable.
*
* @param context the context in which to execute this kernel
* @param variable the index of the variable to get
* @param values a vector containing the values
*/
void setPerDofVariable(ContextImpl& context, int variable, const std::vector<Vec3>& values);
private:
class ReorderListener;
class GlobalTarget;
class DerivFunction;
std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator,
const std::string& forceName, const std::string& energyName, std::vector<const TabulatedFunction*>& functions,
std::vector<std::pair<std::string, std::string> >& functionNames);
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
Lepton::ExpressionTreeNode replaceDerivFunctions(const Lepton::ExpressionTreeNode& node, OpenMM::ContextImpl& context);
void findExpressionsForDerivs(const Lepton::ExpressionTreeNode& node, std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variableNodes);
void recordGlobalValue(double value, GlobalTarget target, CustomIntegrator& integrator);
void recordChangedParameters(ContextImpl& context);
bool evaluateCondition(int step);
OpenCLContext& cl;
double energy;
float energyFloat;
int numGlobalVariables, sumWorkGroupSize;
bool hasInitializedKernels, deviceGlobalsAreCurrent, modifiesParameters, hasAnyConstraints, needsEnergyParamDerivs;
std::vector<bool> deviceValuesAreCurrent;
mutable std::vector<bool> localValuesAreCurrent;
OpenCLArray globalValues;
OpenCLArray sumBuffer;
OpenCLArray summedValue;
OpenCLArray uniformRandoms;
OpenCLArray randomSeed;
OpenCLArray perDofEnergyParamDerivs;
std::vector<OpenCLArray> tabulatedFunctions, perDofValues;
std::map<int, double> savedEnergy;
std::map<int, OpenCLArray> savedForces;
std::set<int> validSavedForces;
mutable std::vector<std::vector<mm_float4> > localPerDofValuesFloat;
mutable std::vector<std::vector<mm_double4> > localPerDofValuesDouble;
std::map<std::string, double> energyParamDerivs;
std::vector<std::string> perDofEnergyParamDerivNames;
std::vector<cl_double> localPerDofEnergyParamDerivs;
std::vector<double> localGlobalValues;
std::vector<double> initialGlobalVariables;
std::vector<std::vector<cl::Kernel> > kernels;
cl::Kernel randomKernel, kineticEnergyKernel, sumKineticEnergyKernel;
std::vector<CustomIntegrator::ComputationType> stepType;
std::vector<CustomIntegratorUtilities::Comparison> comparisons;
std::vector<std::vector<Lepton::CompiledExpression> > globalExpressions;
CompiledExpressionSet expressionSet;
std::vector<bool> needsGlobals;
std::vector<bool> needsForces;
std::vector<bool> needsEnergy;
std::vector<bool> computeBothForceAndEnergy;
std::vector<bool> invalidatesForces;
std::vector<bool> merged;
std::vector<int> forceGroupFlags;
std::vector<int> blockEnd;
std::vector<int> requiredGaussian;
std::vector<int> requiredUniform;
std::vector<int> stepEnergyVariableIndex;
std::vector<int> globalVariableIndex;
std::vector<int> parameterVariableIndex;
int gaussianVariableIndex, uniformVariableIndex, dtVariableIndex;
std::vector<std::string> parameterNames;
std::vector<GlobalTarget> stepTarget;
};
class OpenCLIntegrateCustomStepKernel::GlobalTarget {
public:
OpenCLIntegrateCustomStepKernel::GlobalTargetType type;
int variableIndex;
GlobalTarget() {
}
GlobalTarget(OpenCLIntegrateCustomStepKernel::GlobalTargetType type, int variableIndex) : type(type), variableIndex(variableIndex) {
}
};
/**
* This kernel is invoked by AndersenThermostat at the start of each time step to adjust the particle velocities.
*/
class OpenCLApplyAndersenThermostatKernel : public ApplyAndersenThermostatKernel {
public:
OpenCLApplyAndersenThermostatKernel(std::string name, const Platform& platform, OpenCLContext& cl) : ApplyAndersenThermostatKernel(name, platform), cl(cl),
hasInitializedKernels(false) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param thermostat the AndersenThermostat this kernel will be used for
*/
void initialize(const System& system, const AndersenThermostat& thermostat);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
*/
void execute(ContextImpl& context);
private:
OpenCLContext& cl;
bool hasInitializedKernels;
int randomSeed;
OpenCLArray atomGroups;
cl::Kernel kernel;
};
/** /**
* This kernel is invoked by NoseHooverChain at the start of each time step to adjust the thermostat * This kernel is invoked by NoseHooverChain at the start of each time step to adjust the thermostat
* and update the associated particle velocities. * and update the associated particle velocities.
...@@ -1760,7 +478,7 @@ public: ...@@ -1760,7 +478,7 @@ public:
/** /**
* Initialize the kernel. * Initialize the kernel.
*/ */
virtual void initialize(); void initialize();
/** /**
* Execute the kernel that propagates the Nose Hoover chain and determines the velocity scale factor. * Execute the kernel that propagates the Nose Hoover chain and determines the velocity scale factor.
* *
...@@ -1770,7 +488,7 @@ public: ...@@ -1770,7 +488,7 @@ public:
* @param timeStep the time step used by the integrator. * @param timeStep the time step used by the integrator.
* @return the {absolute, relative} velocity scale factor to apply to the particles associated with this heat bath. * @return the {absolute, relative} velocity scale factor to apply to the particles associated with this heat bath.
*/ */
virtual std::pair<double, double> propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergies, double timeStep); std::pair<double, double> propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergies, double timeStep);
/** /**
* Execute the kernal that computes the total (kinetic + potential) heat bath energy. * Execute the kernal that computes the total (kinetic + potential) heat bath energy.
* *
...@@ -1778,7 +496,7 @@ public: ...@@ -1778,7 +496,7 @@ public:
* @param noseHooverChain the chain whose energy is to be determined. * @param noseHooverChain the chain whose energy is to be determined.
* @return the total heat bath energy. * @return the total heat bath energy.
*/ */
virtual double computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc); double computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc);
/** /**
* Execute the kernel that computes the kinetic energy for a subset of atoms, * Execute the kernel that computes the kinetic energy for a subset of atoms,
* or the relative kinetic energy of Drude particles with respect to their parent atoms * or the relative kinetic energy of Drude particles with respect to their parent atoms
...@@ -1788,7 +506,7 @@ public: ...@@ -1788,7 +506,7 @@ public:
* @param downloadValue whether the computed value should be downloaded and returned. * @param downloadValue whether the computed value should be downloaded and returned.
* *
*/ */
virtual std::pair<double,double> computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &noseHooverChain, bool downloadValue); std::pair<double,double> computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &noseHooverChain, bool downloadValue);
/** /**
* Execute the kernel that scales the velocities of particles associated with a nose hoover chain * Execute the kernel that scales the velocities of particles associated with a nose hoover chain
...@@ -1797,7 +515,7 @@ public: ...@@ -1797,7 +515,7 @@ public:
* @param noseHooverChain the chain whose energy is to be determined. * @param noseHooverChain the chain whose energy is to be determined.
* @param scaleFactors the {absolute, relative} multiplicative factor by which velocities are scaled. * @param scaleFactors the {absolute, relative} multiplicative factor by which velocities are scaled.
*/ */
virtual void scaleVelocities(ContextImpl& context, const NoseHooverChain &noseHooverChain, std::pair<double, double> scaleFactors); void scaleVelocities(ContextImpl& context, const NoseHooverChain &noseHooverChain, std::pair<double, double> scaleFactors);
private: private:
int sumWorkGroupSize; int sumWorkGroupSize;
...@@ -1813,7 +531,6 @@ private: ...@@ -1813,7 +531,6 @@ private:
cl::Kernel scalePairsVelocitiesKernel; cl::Kernel scalePairsVelocitiesKernel;
}; };
/** /**
* This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume * This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume
*/ */
...@@ -1861,33 +578,6 @@ private: ...@@ -1861,33 +578,6 @@ private:
std::vector<int> lastAtomOrder; std::vector<int> lastAtomOrder;
}; };
/**
* This kernel is invoked to remove center of mass motion from the system.
*/
class OpenCLRemoveCMMotionKernel : public RemoveCMMotionKernel {
public:
OpenCLRemoveCMMotionKernel(std::string name, const Platform& platform, OpenCLContext& cl) : RemoveCMMotionKernel(name, platform), cl(cl) {
}
/**
* Initialize the kernel, setting up the particle masses.
*
* @param system the System this kernel will be applied to
* @param force the CMMotionRemover this kernel will be used for
*/
void initialize(const System& system, const CMMotionRemover& force);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
*/
void execute(ContextImpl& context);
private:
OpenCLContext& cl;
int frequency;
OpenCLArray cmMomentum;
cl::Kernel kernel1, kernel2;
};
} // namespace OpenMM } // namespace OpenMM
#endif /*OPENMM_OPENCLKERNELS_H_*/ #endif /*OPENMM_OPENCLKERNELS_H_*/
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2018 Stanford University and the Authors. * * Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -27,15 +27,18 @@ ...@@ -27,15 +27,18 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * * along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "OpenCLContext.h"
#include "openmm/System.h" #include "openmm/System.h"
#include "OpenCLArray.h"
#include "OpenCLExpressionUtilities.h" #include "OpenCLExpressionUtilities.h"
#include "openmm/common/NonbondedUtilities.h"
#include <cl.hpp>
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <vector> #include <vector>
namespace OpenMM { namespace OpenMM {
class OpenCLContext;
class OpenCLSort; class OpenCLSort;
/** /**
...@@ -63,7 +66,7 @@ class OpenCLSort; ...@@ -63,7 +66,7 @@ class OpenCLSort;
* by ForceImpls during calcForcesAndEnergy(). * by ForceImpls during calcForcesAndEnergy().
*/ */
class OPENMM_EXPORT_OPENCL OpenCLNonbondedUtilities { class OPENMM_EXPORT_COMMON OpenCLNonbondedUtilities : public NonbondedUtilities {
public: public:
class ParameterInfo; class ParameterInfo;
OpenCLNonbondedUtilities(OpenCLContext& context); OpenCLNonbondedUtilities(OpenCLContext& context);
...@@ -83,10 +86,22 @@ public: ...@@ -83,10 +86,22 @@ public:
/** /**
* Add a per-atom parameter that the default interaction kernel may depend on. * Add a per-atom parameter that the default interaction kernel may depend on.
*/ */
void addParameter(ComputeParameterInfo parameter);
/**
* Add a per-atom parameter that the default interaction kernel may depend on.
*
* @deprecated Use the version that takes a ComputeParameterInfo instead.
*/
void addParameter(const ParameterInfo& parameter); void addParameter(const ParameterInfo& parameter);
/** /**
* Add an array (other than a per-atom parameter) that should be passed as an argument to the default interaction kernel. * Add an array (other than a per-atom parameter) that should be passed as an argument to the default interaction kernel.
*/ */
void addArgument(ComputeParameterInfo parameter);
/**
* Add an array (other than a per-atom parameter) that should be passed as an argument to the default interaction kernel.
*
* @deprecated Use the version that takes a ComputeParameterInfo instead.
*/
void addArgument(const ParameterInfo& parameter); void addArgument(const ParameterInfo& parameter);
/** /**
* Register that the interaction kernel will be computing the derivative of the potential energy * Register that the interaction kernel will be computing the derivative of the potential energy
...@@ -110,7 +125,7 @@ public: ...@@ -110,7 +125,7 @@ public:
/** /**
* Get the number of force buffers required for nonbonded forces. * Get the number of force buffers required for nonbonded forces.
*/ */
int getNumForceBuffers() { int getNumForceBuffers() const {
return numForceBuffers; return numForceBuffers;
} }
/** /**
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2015 Stanford University and the Authors. * * Portions copyright (c) 2011-2019 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "OpenCLPlatform.h" #include "OpenCLPlatform.h"
#include "OpenCLContext.h" #include "OpenCLContext.h"
#include "OpenCLKernels.h" #include "OpenCLKernels.h"
#include "openmm/common/CommonKernels.h"
namespace OpenMM { namespace OpenMM {
...@@ -97,8 +98,8 @@ private: ...@@ -97,8 +98,8 @@ private:
class OpenCLParallelCalcHarmonicBondForceKernel : public CalcHarmonicBondForceKernel { class OpenCLParallelCalcHarmonicBondForceKernel : public CalcHarmonicBondForceKernel {
public: public:
OpenCLParallelCalcHarmonicBondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system); OpenCLParallelCalcHarmonicBondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system);
OpenCLCalcHarmonicBondForceKernel& getKernel(int index) { CommonCalcHarmonicBondForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcHarmonicBondForceKernel&>(kernels[index].getImpl()); return dynamic_cast<CommonCalcHarmonicBondForceKernel&>(kernels[index].getImpl());
} }
/** /**
* Initialize the kernel. * Initialize the kernel.
...@@ -135,8 +136,8 @@ private: ...@@ -135,8 +136,8 @@ private:
class OpenCLParallelCalcCustomBondForceKernel : public CalcCustomBondForceKernel { class OpenCLParallelCalcCustomBondForceKernel : public CalcCustomBondForceKernel {
public: public:
OpenCLParallelCalcCustomBondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system); OpenCLParallelCalcCustomBondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system);
OpenCLCalcCustomBondForceKernel& getKernel(int index) { CommonCalcCustomBondForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcCustomBondForceKernel&>(kernels[index].getImpl()); return dynamic_cast<CommonCalcCustomBondForceKernel&>(kernels[index].getImpl());
} }
/** /**
* Initialize the kernel. * Initialize the kernel.
...@@ -173,8 +174,8 @@ private: ...@@ -173,8 +174,8 @@ private:
class OpenCLParallelCalcHarmonicAngleForceKernel : public CalcHarmonicAngleForceKernel { class OpenCLParallelCalcHarmonicAngleForceKernel : public CalcHarmonicAngleForceKernel {
public: public:
OpenCLParallelCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system); OpenCLParallelCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system);
OpenCLCalcHarmonicAngleForceKernel& getKernel(int index) { CommonCalcHarmonicAngleForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcHarmonicAngleForceKernel&>(kernels[index].getImpl()); return dynamic_cast<CommonCalcHarmonicAngleForceKernel&>(kernels[index].getImpl());
} }
/** /**
* Initialize the kernel. * Initialize the kernel.
...@@ -211,8 +212,8 @@ private: ...@@ -211,8 +212,8 @@ private:
class OpenCLParallelCalcCustomAngleForceKernel : public CalcCustomAngleForceKernel { class OpenCLParallelCalcCustomAngleForceKernel : public CalcCustomAngleForceKernel {
public: public:
OpenCLParallelCalcCustomAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system); OpenCLParallelCalcCustomAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system);
OpenCLCalcCustomAngleForceKernel& getKernel(int index) { CommonCalcCustomAngleForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcCustomAngleForceKernel&>(kernels[index].getImpl()); return dynamic_cast<CommonCalcCustomAngleForceKernel&>(kernels[index].getImpl());
} }
/** /**
* Initialize the kernel. * Initialize the kernel.
...@@ -249,8 +250,8 @@ private: ...@@ -249,8 +250,8 @@ private:
class OpenCLParallelCalcPeriodicTorsionForceKernel : public CalcPeriodicTorsionForceKernel { class OpenCLParallelCalcPeriodicTorsionForceKernel : public CalcPeriodicTorsionForceKernel {
public: public:
OpenCLParallelCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system); OpenCLParallelCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system);
OpenCLCalcPeriodicTorsionForceKernel& getKernel(int index) { CommonCalcPeriodicTorsionForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcPeriodicTorsionForceKernel&>(kernels[index].getImpl()); return dynamic_cast<CommonCalcPeriodicTorsionForceKernel&>(kernels[index].getImpl());
} }
/** /**
* Initialize the kernel. * Initialize the kernel.
...@@ -287,8 +288,8 @@ private: ...@@ -287,8 +288,8 @@ private:
class OpenCLParallelCalcRBTorsionForceKernel : public CalcRBTorsionForceKernel { class OpenCLParallelCalcRBTorsionForceKernel : public CalcRBTorsionForceKernel {
public: public:
OpenCLParallelCalcRBTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system); OpenCLParallelCalcRBTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system);
OpenCLCalcRBTorsionForceKernel& getKernel(int index) { CommonCalcRBTorsionForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcRBTorsionForceKernel&>(kernels[index].getImpl()); return dynamic_cast<CommonCalcRBTorsionForceKernel&>(kernels[index].getImpl());
} }
/** /**
* Initialize the kernel. * Initialize the kernel.
...@@ -325,8 +326,8 @@ private: ...@@ -325,8 +326,8 @@ private:
class OpenCLParallelCalcCMAPTorsionForceKernel : public CalcCMAPTorsionForceKernel { class OpenCLParallelCalcCMAPTorsionForceKernel : public CalcCMAPTorsionForceKernel {
public: public:
OpenCLParallelCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system); OpenCLParallelCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system);
OpenCLCalcCMAPTorsionForceKernel& getKernel(int index) { CommonCalcCMAPTorsionForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcCMAPTorsionForceKernel&>(kernels[index].getImpl()); return dynamic_cast<CommonCalcCMAPTorsionForceKernel&>(kernels[index].getImpl());
} }
/** /**
* Initialize the kernel. * Initialize the kernel.
...@@ -363,8 +364,8 @@ private: ...@@ -363,8 +364,8 @@ private:
class OpenCLParallelCalcCustomTorsionForceKernel : public CalcCustomTorsionForceKernel { class OpenCLParallelCalcCustomTorsionForceKernel : public CalcCustomTorsionForceKernel {
public: public:
OpenCLParallelCalcCustomTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system); OpenCLParallelCalcCustomTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system);
OpenCLCalcCustomTorsionForceKernel& getKernel(int index) { CommonCalcCustomTorsionForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcCustomTorsionForceKernel&>(kernels[index].getImpl()); return dynamic_cast<CommonCalcCustomTorsionForceKernel&>(kernels[index].getImpl());
} }
/** /**
* Initialize the kernel. * Initialize the kernel.
...@@ -459,8 +460,8 @@ private: ...@@ -459,8 +460,8 @@ private:
class OpenCLParallelCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel { class OpenCLParallelCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel {
public: public:
OpenCLParallelCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system); OpenCLParallelCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system);
OpenCLCalcCustomNonbondedForceKernel& getKernel(int index) { CommonCalcCustomNonbondedForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcCustomNonbondedForceKernel&>(kernels[index].getImpl()); return dynamic_cast<CommonCalcCustomNonbondedForceKernel&>(kernels[index].getImpl());
} }
/** /**
* Initialize the kernel. * Initialize the kernel.
...@@ -497,8 +498,8 @@ private: ...@@ -497,8 +498,8 @@ private:
class OpenCLParallelCalcCustomExternalForceKernel : public CalcCustomExternalForceKernel { class OpenCLParallelCalcCustomExternalForceKernel : public CalcCustomExternalForceKernel {
public: public:
OpenCLParallelCalcCustomExternalForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system); OpenCLParallelCalcCustomExternalForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system);
OpenCLCalcCustomExternalForceKernel& getKernel(int index) { CommonCalcCustomExternalForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcCustomExternalForceKernel&>(kernels[index].getImpl()); return dynamic_cast<CommonCalcCustomExternalForceKernel&>(kernels[index].getImpl());
} }
/** /**
* Initialize the kernel. * Initialize the kernel.
...@@ -535,8 +536,8 @@ private: ...@@ -535,8 +536,8 @@ private:
class OpenCLParallelCalcCustomHbondForceKernel : public CalcCustomHbondForceKernel { class OpenCLParallelCalcCustomHbondForceKernel : public CalcCustomHbondForceKernel {
public: public:
OpenCLParallelCalcCustomHbondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system); OpenCLParallelCalcCustomHbondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system);
OpenCLCalcCustomHbondForceKernel& getKernel(int index) { CommonCalcCustomHbondForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcCustomHbondForceKernel&>(kernels[index].getImpl()); return dynamic_cast<CommonCalcCustomHbondForceKernel&>(kernels[index].getImpl());
} }
/** /**
* Initialize the kernel. * Initialize the kernel.
...@@ -573,8 +574,8 @@ private: ...@@ -573,8 +574,8 @@ private:
class OpenCLParallelCalcCustomCompoundBondForceKernel : public CalcCustomCompoundBondForceKernel { class OpenCLParallelCalcCustomCompoundBondForceKernel : public CalcCustomCompoundBondForceKernel {
public: public:
OpenCLParallelCalcCustomCompoundBondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system); OpenCLParallelCalcCustomCompoundBondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system);
OpenCLCalcCustomCompoundBondForceKernel& getKernel(int index) { CommonCalcCustomCompoundBondForceKernel& getKernel(int index) {
return dynamic_cast<OpenCLCalcCustomCompoundBondForceKernel&>(kernels[index].getImpl()); return dynamic_cast<CommonCalcCustomCompoundBondForceKernel&>(kernels[index].getImpl());
} }
/** /**
* Initialize the kernel. * Initialize the kernel.
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2012 Stanford University and the Authors. * * Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -29,18 +29,18 @@ ...@@ -29,18 +29,18 @@
#include "OpenCLContext.h" #include "OpenCLContext.h"
#include "OpenCLNonbondedUtilities.h" #include "OpenCLNonbondedUtilities.h"
#include "openmm/common/ComputeParameterSet.h"
namespace OpenMM { namespace OpenMM {
class OpenCLNonbondedUtilities; class OpenCLNonbondedUtilities;
/** /**
* This class represents a set of floating point parameter values for a set of objects (particles, bonds, etc.). * This class exists for backward compatibility. For most purposes you can use
* It automatically creates an appropriate set of cl::Buffers to hold the parameter values, based * ComputeParameterSet directly instead.
* on the number of parameters required.
*/ */
class OPENMM_EXPORT_OPENCL OpenCLParameterSet { class OPENMM_EXPORT_COMMON OpenCLParameterSet : public ComputeParameterSet {
public: public:
/** /**
* Create an OpenCLParameterSet. * Create an OpenCLParameterSet.
...@@ -54,33 +54,6 @@ public: ...@@ -54,33 +54,6 @@ public:
* @param useDoublePrecision whether values should be stored as single or double precision * @param useDoublePrecision whether values should be stored as single or double precision
*/ */
OpenCLParameterSet(OpenCLContext& context, int numParameters, int numObjects, const std::string& name, bool bufferPerParameter=false, bool useDoublePrecision=false); OpenCLParameterSet(OpenCLContext& context, int numParameters, int numObjects, const std::string& name, bool bufferPerParameter=false, bool useDoublePrecision=false);
~OpenCLParameterSet();
/**
* Get the number of parameters.
*/
int getNumParameters() const {
return numParameters;
}
/**
* Get the number of objects.
*/
int getNumObjects() const {
return numObjects;
}
/**
* Get the values of all parameters.
*
* @param values on exit, values[i][j] contains the value of parameter j for object i
*/
template <class T>
void getParameterValues(std::vector<std::vector<T> >& values) const;
/**
* Set the values of all parameters.
*
* @param values values[i][j] contains the value of parameter j for object i
*/
template <class T>
void setParameterValues(const std::vector<std::vector<T> >& values);
/** /**
* Get a set of OpenCLNonbondedUtilities::ParameterInfo objects which describe the Buffers * Get a set of OpenCLNonbondedUtilities::ParameterInfo objects which describe the Buffers
* containing the data. * containing the data.
...@@ -95,18 +68,7 @@ public: ...@@ -95,18 +68,7 @@ public:
const std::vector<OpenCLNonbondedUtilities::ParameterInfo>& getBuffers() const { const std::vector<OpenCLNonbondedUtilities::ParameterInfo>& getBuffers() const {
return buffers; return buffers;
} }
/**
* Get a suffix to add to variable names when accessing a certain parameter.
*
* @param index the index of the parameter
* @param extraSuffix an extra suffix to add to the variable name
* @return the suffix to append
*/
std::string getParameterSuffix(int index, const std::string& extraSuffix = "") const;
private: private:
OpenCLContext& context;
int numParameters, numObjects, elementSize;
std::string name;
std::vector<OpenCLNonbondedUtilities::ParameterInfo> buffers; std::vector<OpenCLNonbondedUtilities::ParameterInfo> buffers;
}; };
......
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
#include "openmm/Platform.h" #include "openmm/Platform.h"
#include "openmm/System.h" #include "openmm/System.h"
#include "openmm/internal/ThreadPool.h" #include "openmm/internal/ThreadPool.h"
#include "windowsExportOpenCL.h" #include "openmm/common/windowsExportCommon.h"
namespace OpenMM { namespace OpenMM {
class OpenCLContext; class OpenCLContext;
...@@ -39,7 +39,7 @@ class OpenCLContext; ...@@ -39,7 +39,7 @@ class OpenCLContext;
* This Platform subclass uses OpenCL implementations of the OpenMM kernels. * This Platform subclass uses OpenCL implementations of the OpenMM kernels.
*/ */
class OPENMM_EXPORT_OPENCL OpenCLPlatform : public Platform { class OPENMM_EXPORT_COMMON OpenCLPlatform : public Platform {
public: public:
class PlatformData; class PlatformData;
OpenCLPlatform(); OpenCLPlatform();
...@@ -106,7 +106,7 @@ public: ...@@ -106,7 +106,7 @@ public:
} }
}; };
class OPENMM_EXPORT_OPENCL OpenCLPlatform::PlatformData { class OPENMM_EXPORT_COMMON OpenCLPlatform::PlatformData {
public: public:
PlatformData(const System& system, const std::string& platformPropValue, const std::string& deviceIndexProperty, const std::string& precisionProperty, PlatformData(const System& system, const std::string& platformPropValue, const std::string& deviceIndexProperty, const std::string& precisionProperty,
const std::string& cpuPmeProperty, const std::string& pmeStreamProperty, int numThreads, ContextImpl* originalContext); const std::string& cpuPmeProperty, const std::string& pmeStreamProperty, int numThreads, ContextImpl* originalContext);
......
#ifndef OPENMM_OPENCLPROGRAM_H_
#define OPENMM_OPENCLPROGRAM_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/common/ComputeProgram.h"
#include "OpenCLContext.h"
namespace OpenMM {
/**
* This is the OpenCL implementation of the ComputeProgramImpl interface.
*/
class OpenCLProgram : public ComputeProgramImpl {
public:
/**
* Create a new OpenCLProgram.
*
* @param context the context this kernel belongs to
* @param program the compiled program
*/
OpenCLProgram(OpenCLContext& context, cl::Program program);
/**
* Create a ComputeKernel for one of the kernels in this program.
*
* @param name the name of the kernel to get
*/
ComputeKernel createKernel(const std::string& name);
private:
OpenCLContext& context;
cl::Program program;
};
} // namespace OpenMM
#endif /*OPENMM_OPENCLPROGRAM_H_*/
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
#include "OpenCLArray.h" #include "OpenCLArray.h"
#include "OpenCLContext.h" #include "OpenCLContext.h"
#include "windowsExportOpenCL.h" #include "openmm/common/windowsExportCommon.h"
namespace OpenMM { namespace OpenMM {
...@@ -66,7 +66,7 @@ namespace OpenMM { ...@@ -66,7 +66,7 @@ namespace OpenMM {
* elements). * elements).
*/ */
class OPENMM_EXPORT_OPENCL OpenCLSort { class OPENMM_EXPORT_COMMON OpenCLSort {
public: public:
class SortTrait; class SortTrait;
/** /**
......
# #
# Include OpenCL related files. # Include OpenCL related files.
# #
# INCLUDE(${CMAKE_CURRENT_SOURCE_DIR}/../FindOpenCL.cmake) # INCLUDE(${CMAKE_CURRENT_SOURCE_DIR}/../FindOpenCL.cmake)
INCLUDE_DIRECTORIES(${OPENCL_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${OPENCL_INCLUDE_DIR})
FILE(GLOB OPENCL_KERNELS ${CL_SOURCE_DIR}/kernels/*.cl) FILE(GLOB OPENCL_KERNELS ${KERNEL_SOURCE_DIR}/kernels/*.cl)
ADD_CUSTOM_COMMAND(OUTPUT ${CL_KERNELS_CPP} ${CL_KERNELS_H} ADD_CUSTOM_COMMAND(OUTPUT ${KERNELS_CPP} ${KERNELS_H}
COMMAND ${CMAKE_COMMAND} COMMAND ${CMAKE_COMMAND}
ARGS -D CL_SOURCE_DIR=${CL_SOURCE_DIR} -D CL_KERNELS_CPP=${CL_KERNELS_CPP} -D CL_KERNELS_H=${CL_KERNELS_H} -D CL_SOURCE_CLASS=${CL_SOURCE_CLASS} -P ${CMAKE_CURRENT_SOURCE_DIR}/../EncodeCLFiles.cmake ARGS -D KERNEL_SOURCE_DIR=${KERNEL_SOURCE_DIR} -D KERNELS_CPP=${KERNELS_CPP} -D KERNELS_H=${KERNELS_H} -D KERNEL_SOURCE_CLASS=${KERNEL_SOURCE_CLASS} -D KERNEL_FILE_EXTENSION=cl -P ${CMAKE_SOURCE_DIR}/cmake_modules/EncodeKernelFiles.cmake
DEPENDS ${OPENCL_KERNELS} DEPENDS ${OPENCL_KERNELS}
) )
SET_SOURCE_FILES_PROPERTIES(${CL_KERNELS_CPP} ${CL_KERNELS_H} PROPERTIES GENERATED TRUE) SET_SOURCE_FILES_PROPERTIES(${KERNELS_CPP} ${KERNELS_H} ${COMMON_KERNELS_CPP} PROPERTIES GENERATED TRUE)
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES}) ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
ADD_DEPENDENCIES(${SHARED_TARGET} CommonKernels)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME} ${OPENCL_LIBRARIES} ${PTHREADS_LIB})
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_OPENCL_BUILDING_SHARED_LIBRARY") TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME} ${OPENCL_LIBRARIES} ${PTHREADS_LIB})
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_COMMON_BUILDING_SHARED_LIBRARY")
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2012-2018 Stanford University and the Authors. * * Portions copyright (c) 2012-2019 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -48,6 +48,10 @@ OpenCLArray::~OpenCLArray() { ...@@ -48,6 +48,10 @@ OpenCLArray::~OpenCLArray() {
delete buffer; delete buffer;
} }
void OpenCLArray::initialize(ComputeContext& context, int size, int elementSize, const std::string& name) {
initialize(dynamic_cast<OpenCLContext&>(context), size, elementSize, name, CL_MEM_READ_WRITE);
}
void OpenCLArray::initialize(OpenCLContext& context, int size, int elementSize, const std::string& name, cl_int flags) { void OpenCLArray::initialize(OpenCLContext& context, int size, int elementSize, const std::string& name, cl_int flags) {
if (buffer != NULL) if (buffer != NULL)
throw OpenMMException("OpenCLArray has already been initialized"); throw OpenMMException("OpenCLArray has already been initialized");
...@@ -88,6 +92,10 @@ void OpenCLArray::resize(int size) { ...@@ -88,6 +92,10 @@ void OpenCLArray::resize(int size) {
initialize(*context, size, elementSize, name, flags); initialize(*context, size, elementSize, name, flags);
} }
ComputeContext& OpenCLArray::getContext() {
return *context;
}
void OpenCLArray::upload(const void* data, bool blocking) { void OpenCLArray::upload(const void* data, bool blocking) {
if (buffer == NULL) if (buffer == NULL)
throw OpenMMException("OpenCLArray has not been initialized"); throw OpenMMException("OpenCLArray has not been initialized");
...@@ -114,13 +122,14 @@ void OpenCLArray::download(void* data, bool blocking) const { ...@@ -114,13 +122,14 @@ void OpenCLArray::download(void* data, bool blocking) const {
} }
} }
void OpenCLArray::copyTo(OpenCLArray& dest) const { void OpenCLArray::copyTo(ArrayInterface& dest) const {
if (buffer == NULL) if (buffer == NULL)
throw OpenMMException("OpenCLArray has not been initialized"); throw OpenMMException("OpenCLArray has not been initialized");
if (dest.getSize() != size || dest.getElementSize() != elementSize) if (dest.getSize() != size || dest.getElementSize() != elementSize)
throw OpenMMException("Error copying array "+name+" to "+dest.getName()+": The destination array does not match the size of the array"); throw OpenMMException("Error copying array "+name+" to "+dest.getName()+": The destination array does not match the size of the array");
OpenCLArray& clDest = context->unwrap(dest);
try { try {
context->getQueue().enqueueCopyBuffer(*buffer, dest.getDeviceBuffer(), 0, 0, size*elementSize); context->getQueue().enqueueCopyBuffer(*buffer, clDest.getDeviceBuffer(), 0, 0, size*elementSize);
} }
catch (cl::Error err) { catch (cl::Error err) {
std::stringstream str; std::stringstream str;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2018 Stanford University and the Authors. * * Portions copyright (c) 2011-2019 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "OpenCLBondedUtilities.h" #include "OpenCLBondedUtilities.h"
#include "OpenCLContext.h"
#include "OpenCLExpressionUtilities.h" #include "OpenCLExpressionUtilities.h"
#include "openmm/OpenMMException.h" #include "openmm/OpenMMException.h"
#include "OpenCLNonbondedUtilities.h" #include "OpenCLNonbondedUtilities.h"
...@@ -55,6 +56,10 @@ string OpenCLBondedUtilities::addArgument(cl::Memory& data, const string& type) ...@@ -55,6 +56,10 @@ string OpenCLBondedUtilities::addArgument(cl::Memory& data, const string& type)
return "customArg"+context.intToString(arguments.size()); return "customArg"+context.intToString(arguments.size());
} }
string OpenCLBondedUtilities::addArgument(ArrayInterface& data, const string& type) {
return addArgument(context.unwrap(data).getDeviceBuffer(), type);
}
string OpenCLBondedUtilities::addEnergyParameterDerivative(const string& param) { string OpenCLBondedUtilities::addEnergyParameterDerivative(const string& param) {
// See if the parameter has already been added. // See if the parameter has already been added.
......
...@@ -31,11 +31,13 @@ ...@@ -31,11 +31,13 @@
#include "OpenCLContext.h" #include "OpenCLContext.h"
#include "OpenCLArray.h" #include "OpenCLArray.h"
#include "OpenCLBondedUtilities.h" #include "OpenCLBondedUtilities.h"
#include "OpenCLEvent.h"
#include "OpenCLForceInfo.h" #include "OpenCLForceInfo.h"
#include "OpenCLIntegrationUtilities.h" #include "OpenCLIntegrationUtilities.h"
#include "OpenCLKernelSources.h" #include "OpenCLKernelSources.h"
#include "OpenCLNonbondedUtilities.h" #include "OpenCLNonbondedUtilities.h"
#include "hilbert.h" #include "OpenCLProgram.h"
#include "openmm/common/ComputeArray.h"
#include "openmm/Platform.h" #include "openmm/Platform.h"
#include "openmm/System.h" #include "openmm/System.h"
#include "openmm/VirtualSite.h" #include "openmm/VirtualSite.h"
...@@ -68,8 +70,8 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i ...@@ -68,8 +70,8 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
} }
OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData, OpenCLContext* originalContext) : OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData, OpenCLContext* originalContext) :
system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false), hasAssignedPosqCharges(false), ComputeContext(system), platformData(platformData), numForceBuffers(0), hasAssignedPosqCharges(false),
integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) { integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL) {
if (precision == "single") { if (precision == "single") {
useDoublePrecision = false; useDoublePrecision = false;
useMixedPrecision = false; useMixedPrecision = false;
...@@ -278,7 +280,13 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -278,7 +280,13 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
velm.initialize<mm_double4>(*this, paddedNumAtoms, "velm"); velm.initialize<mm_double4>(*this, paddedNumAtoms, "velm");
compilationDefines["USE_DOUBLE_PRECISION"] = "1"; compilationDefines["USE_DOUBLE_PRECISION"] = "1";
compilationDefines["convert_real4"] = "convert_double4"; compilationDefines["convert_real4"] = "convert_double4";
compilationDefines["make_real2"] = "make_double2";
compilationDefines["make_real3"] = "make_double3";
compilationDefines["make_real4"] = "make_double4";
compilationDefines["convert_mixed4"] = "convert_double4"; compilationDefines["convert_mixed4"] = "convert_double4";
compilationDefines["make_mixed2"] = "make_double2";
compilationDefines["make_mixed3"] = "make_double3";
compilationDefines["make_mixed4"] = "make_double4";
} }
else if (useMixedPrecision) { else if (useMixedPrecision) {
posq.initialize<mm_float4>(*this, paddedNumAtoms, "posq"); posq.initialize<mm_float4>(*this, paddedNumAtoms, "posq");
...@@ -286,14 +294,27 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -286,14 +294,27 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
velm.initialize<mm_double4>(*this, paddedNumAtoms, "velm"); velm.initialize<mm_double4>(*this, paddedNumAtoms, "velm");
compilationDefines["USE_MIXED_PRECISION"] = "1"; compilationDefines["USE_MIXED_PRECISION"] = "1";
compilationDefines["convert_real4"] = "convert_float4"; compilationDefines["convert_real4"] = "convert_float4";
compilationDefines["make_real2"] = "make_float2";
compilationDefines["make_real3"] = "make_float3";
compilationDefines["make_real4"] = "make_float4";
compilationDefines["convert_mixed4"] = "convert_double4"; compilationDefines["convert_mixed4"] = "convert_double4";
compilationDefines["make_mixed2"] = "make_double2";
compilationDefines["make_mixed3"] = "make_double3";
compilationDefines["make_mixed4"] = "make_double4";
} }
else { else {
posq.initialize<mm_float4>(*this, paddedNumAtoms, "posq"); posq.initialize<mm_float4>(*this, paddedNumAtoms, "posq");
velm.initialize<mm_float4>(*this, paddedNumAtoms, "velm"); velm.initialize<mm_float4>(*this, paddedNumAtoms, "velm");
compilationDefines["convert_real4"] = "convert_float4"; compilationDefines["convert_real4"] = "convert_float4";
compilationDefines["make_real2"] = "make_float2";
compilationDefines["make_real3"] = "make_float3";
compilationDefines["make_real4"] = "make_float4";
compilationDefines["convert_mixed4"] = "convert_float4"; compilationDefines["convert_mixed4"] = "convert_float4";
compilationDefines["make_mixed2"] = "make_float2";
compilationDefines["make_mixed3"] = "make_float3";
compilationDefines["make_mixed4"] = "make_float4";
} }
longForceBuffer.initialize<cl_long>(*this, 3*paddedNumAtoms, "longForceBuffer");
posCellOffsets.resize(paddedNumAtoms, mm_int4(0, 0, 0, 0)); posCellOffsets.resize(paddedNumAtoms, mm_int4(0, 0, 0, 0));
atomIndexDevice.initialize<cl_int>(*this, paddedNumAtoms, "atomIndexDevice"); atomIndexDevice.initialize<cl_int>(*this, paddedNumAtoms, "atomIndexDevice");
atomIndex.resize(paddedNumAtoms); atomIndex.resize(paddedNumAtoms);
...@@ -317,8 +338,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -317,8 +338,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
clearFiveBuffersKernel = cl::Kernel(utilities, "clearFiveBuffers"); clearFiveBuffersKernel = cl::Kernel(utilities, "clearFiveBuffers");
clearSixBuffersKernel = cl::Kernel(utilities, "clearSixBuffers"); clearSixBuffersKernel = cl::Kernel(utilities, "clearSixBuffers");
reduceReal4Kernel = cl::Kernel(utilities, "reduceReal4Buffer"); reduceReal4Kernel = cl::Kernel(utilities, "reduceReal4Buffer");
if (supports64BitGlobalAtomics) reduceForcesKernel = cl::Kernel(utilities, "reduceForces");
reduceForcesKernel = cl::Kernel(utilities, "reduceForces");
reduceEnergyKernel = cl::Kernel(utilities, "reduceEnergy"); reduceEnergyKernel = cl::Kernel(utilities, "reduceEnergy");
setChargesKernel = cl::Kernel(utilities, "setCharges"); setChargesKernel = cl::Kernel(utilities, "setCharges");
...@@ -361,6 +381,13 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -361,6 +381,13 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
compilationDefines["EXP"] = "exp"; compilationDefines["EXP"] = "exp";
compilationDefines["LOG"] = "log"; compilationDefines["LOG"] = "log";
} }
compilationDefines["POW"] = "pow";
compilationDefines["COS"] = "cos";
compilationDefines["SIN"] = "sin";
compilationDefines["TAN"] = "tan";
compilationDefines["ACOS"] = "acos";
compilationDefines["ASIN"] = "asin";
compilationDefines["ATAN"] = "atan";
// Set defines for applying periodic boundary conditions. // Set defines for applying periodic boundary conditions.
...@@ -410,10 +437,6 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -410,10 +437,6 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
"pos.z -= floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;}"; "pos.z -= floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;}";
} }
// Create the work thread used for parallelization when running on multiple devices.
thread = new WorkThread();
// Create utilities objects. // Create utilities objects.
bonded = new OpenCLBondedUtilities(*this); bonded = new OpenCLBondedUtilities(*this);
...@@ -441,16 +464,13 @@ OpenCLContext::~OpenCLContext() { ...@@ -441,16 +464,13 @@ OpenCLContext::~OpenCLContext() {
delete bonded; delete bonded;
if (nonbonded != NULL) if (nonbonded != NULL)
delete nonbonded; delete nonbonded;
if (thread != NULL)
delete thread;
} }
void OpenCLContext::initialize() { void OpenCLContext::initialize() {
bonded->initialize(system); bonded->initialize(system);
numForceBuffers = platformData.contexts.size(); numForceBuffers = std::max(numForceBuffers, (int) platformData.contexts.size());
numForceBuffers = std::max(numForceBuffers, bonded->getNumForceBuffers()); numForceBuffers = std::max(numForceBuffers, bonded->getNumForceBuffers());
for (auto force : forces) numForceBuffers = std::max(numForceBuffers, nonbonded->getNumForceBuffers());
numForceBuffers = std::max(numForceBuffers, force->getRequiredForceBuffers());
int energyBufferSize = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers()); int energyBufferSize = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers());
if (useDoublePrecision) { if (useDoublePrecision) {
forceBuffers.initialize<mm_double4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers"); forceBuffers.initialize<mm_double4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
...@@ -470,14 +490,12 @@ void OpenCLContext::initialize() { ...@@ -470,14 +490,12 @@ void OpenCLContext::initialize() {
energyBuffer.initialize<cl_float>(*this, energyBufferSize, "energyBuffer"); energyBuffer.initialize<cl_float>(*this, energyBufferSize, "energyBuffer");
energySum.initialize<cl_float>(*this, 1, "energySum"); energySum.initialize<cl_float>(*this, 1, "energySum");
} }
if (supports64BitGlobalAtomics) { reduceForcesKernel.setArg<cl::Buffer>(0, longForceBuffer.getDeviceBuffer());
longForceBuffer.initialize<cl_long>(*this, 3*paddedNumAtoms, "longForceBuffer"); reduceForcesKernel.setArg<cl::Buffer>(1, forceBuffers.getDeviceBuffer());
reduceForcesKernel.setArg<cl::Buffer>(0, longForceBuffer.getDeviceBuffer()); reduceForcesKernel.setArg<cl_int>(2, paddedNumAtoms);
reduceForcesKernel.setArg<cl::Buffer>(1, forceBuffers.getDeviceBuffer()); reduceForcesKernel.setArg<cl_int>(3, numForceBuffers);
reduceForcesKernel.setArg<cl_int>(2, paddedNumAtoms); if (supports64BitGlobalAtomics)
reduceForcesKernel.setArg<cl_int>(3, numForceBuffers);
addAutoclearBuffer(longForceBuffer); addAutoclearBuffer(longForceBuffer);
}
addAutoclearBuffer(forceBuffers); addAutoclearBuffer(forceBuffers);
addAutoclearBuffer(energyBuffer); addAutoclearBuffer(energyBuffer);
int numEnergyParamDerivs = energyParamDerivNames.size(); int numEnergyParamDerivs = energyParamDerivNames.size();
...@@ -503,44 +521,19 @@ void OpenCLContext::initialize() { ...@@ -503,44 +521,19 @@ void OpenCLContext::initialize() {
nonbonded->initialize(system); nonbonded->initialize(system);
} }
void OpenCLContext::addForce(OpenCLForceInfo* force) { void OpenCLContext::initializeContexts() {
forces.push_back(force); getPlatformData().initializeContexts(system);
} }
vector<OpenCLForceInfo*>& OpenCLContext::getForceInfos() { void OpenCLContext::addForce(ComputeForceInfo* force) {
return forces; ComputeContext::addForce(force);
OpenCLForceInfo* clinfo = dynamic_cast<OpenCLForceInfo*>(force);
if (clinfo != NULL)
requestForceBuffers(clinfo->getRequiredForceBuffers());
} }
string OpenCLContext::replaceStrings(const string& input, const std::map<std::string, std::string>& replacements) const { void OpenCLContext::requestForceBuffers(int minBuffers) {
static set<char> symbolChars; numForceBuffers = std::max(numForceBuffers, minBuffers);
if (symbolChars.size() == 0) {
symbolChars.insert('_');
for (char c = 'a'; c <= 'z'; c++)
symbolChars.insert(c);
for (char c = 'A'; c <= 'Z'; c++)
symbolChars.insert(c);
for (char c = '0'; c <= '9'; c++)
symbolChars.insert(c);
}
string result = input;
for (auto& pair : replacements) {
int index = 0;
int size = pair.first.size();
do {
index = result.find(pair.first, index);
if (index != result.npos) {
if ((index == 0 || symbolChars.find(result[index-1]) == symbolChars.end()) && (index == result.size()-size || symbolChars.find(result[index+size]) == symbolChars.end())) {
// We have found a complete symbol, not part of a longer symbol.
result.replace(index, size, pair.second);
index += pair.second.size();
}
else
index++;
}
} while (index != result.npos);
}
return result;
} }
cl::Program OpenCLContext::createProgram(const string source, const char* optimizationFlags) { cl::Program OpenCLContext::createProgram(const string source, const char* optimizationFlags) {
...@@ -586,6 +579,7 @@ cl::Program OpenCLContext::createProgram(const string source, const map<string, ...@@ -586,6 +579,7 @@ cl::Program OpenCLContext::createProgram(const string source, const map<string,
src << "typedef float3 mixed3;\n"; src << "typedef float3 mixed3;\n";
src << "typedef float4 mixed4;\n"; src << "typedef float4 mixed4;\n";
} }
src << OpenCLKernelSources::common << endl;
for (auto& pair : defines) { for (auto& pair : defines) {
src << "#define " << pair.first; src << "#define " << pair.first;
if (!pair.second.empty()) if (!pair.second.empty())
...@@ -620,19 +614,29 @@ void OpenCLContext::restoreDefaultQueue() { ...@@ -620,19 +614,29 @@ void OpenCLContext::restoreDefaultQueue() {
currentQueue = defaultQueue; currentQueue = defaultQueue;
} }
string OpenCLContext::doubleToString(double value) const { OpenCLArray* OpenCLContext::createArray() {
stringstream s; return new OpenCLArray();
s.precision(useDoublePrecision ? 16 : 8); }
s << scientific << value;
if (!useDoublePrecision) ComputeEvent OpenCLContext::createEvent() {
s << "f"; return shared_ptr<ComputeEventImpl>(new OpenCLEvent(*this));
return s.str();
} }
string OpenCLContext::intToString(int value) const { ComputeProgram OpenCLContext::compileProgram(const std::string source, const std::map<std::string, std::string>& defines) {
stringstream s; cl::Program program = createProgram(source, defines);
s << value; return shared_ptr<ComputeProgramImpl>(new OpenCLProgram(*this, program));
return s.str(); }
OpenCLArray& OpenCLContext::unwrap(ArrayInterface& array) const {
OpenCLArray* clarray;
ComputeArray* wrapper = dynamic_cast<ComputeArray*>(&array);
if (wrapper != NULL)
clarray = dynamic_cast<OpenCLArray*>(&wrapper->getArray());
else
clarray = dynamic_cast<OpenCLArray*>(&array);
if (clarray == NULL)
throw OpenMMException("Array argument is not an OpenCLArray");
return *clarray;
} }
void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSize) { void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSize) {
...@@ -649,8 +653,8 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi ...@@ -649,8 +653,8 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi
} }
} }
void OpenCLContext::clearBuffer(OpenCLArray& array) { void OpenCLContext::clearBuffer(ArrayInterface& array) {
clearBuffer(array.getDeviceBuffer(), array.getSize()*array.getElementSize()); clearBuffer(unwrap(array).getDeviceBuffer(), array.getSize()*array.getElementSize());
} }
void OpenCLContext::clearBuffer(cl::Memory& memory, int size) { void OpenCLContext::clearBuffer(cl::Memory& memory, int size) {
...@@ -660,8 +664,8 @@ void OpenCLContext::clearBuffer(cl::Memory& memory, int size) { ...@@ -660,8 +664,8 @@ void OpenCLContext::clearBuffer(cl::Memory& memory, int size) {
executeKernel(clearBufferKernel, words, 128); executeKernel(clearBufferKernel, words, 128);
} }
void OpenCLContext::addAutoclearBuffer(OpenCLArray& array) { void OpenCLContext::addAutoclearBuffer(ArrayInterface& array) {
addAutoclearBuffer(array.getDeviceBuffer(), array.getSize()*array.getElementSize()); addAutoclearBuffer(unwrap(array).getDeviceBuffer(), array.getSize()*array.getElementSize());
} }
void OpenCLContext::addAutoclearBuffer(cl::Memory& memory, int size) { void OpenCLContext::addAutoclearBuffer(cl::Memory& memory, int size) {
...@@ -734,10 +738,7 @@ void OpenCLContext::clearAutoclearBuffers() { ...@@ -734,10 +738,7 @@ void OpenCLContext::clearAutoclearBuffers() {
} }
void OpenCLContext::reduceForces() { void OpenCLContext::reduceForces() {
if (supports64BitGlobalAtomics) executeKernel(reduceForcesKernel, paddedNumAtoms, 128);
executeKernel(reduceForcesKernel, paddedNumAtoms, 128);
else
reduceBuffer(forceBuffers, numForceBuffers);
} }
void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) { void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) {
...@@ -776,7 +777,7 @@ void OpenCLContext::setCharges(const vector<double>& charges) { ...@@ -776,7 +777,7 @@ void OpenCLContext::setCharges(const vector<double>& charges) {
vector<double> c(numAtoms); vector<double> c(numAtoms);
for (int i = 0; i < numAtoms; i++) for (int i = 0; i < numAtoms; i++)
c[i] = charges[i]; c[i] = charges[i];
chargeBuffer.upload(c, true, true); chargeBuffer.upload(c, true);
setChargesKernel.setArg<cl::Buffer>(0, chargeBuffer.getDeviceBuffer()); setChargesKernel.setArg<cl::Buffer>(0, chargeBuffer.getDeviceBuffer());
setChargesKernel.setArg<cl::Buffer>(1, posq.getDeviceBuffer()); setChargesKernel.setArg<cl::Buffer>(1, posq.getDeviceBuffer());
setChargesKernel.setArg<cl::Buffer>(2, atomIndexDevice.getDeviceBuffer()); setChargesKernel.setArg<cl::Buffer>(2, atomIndexDevice.getDeviceBuffer());
...@@ -790,522 +791,6 @@ bool OpenCLContext::requestPosqCharges() { ...@@ -790,522 +791,6 @@ bool OpenCLContext::requestPosqCharges() {
return allow; return allow;
} }
/**
* This class ensures that atom reordering doesn't break virtual sites.
*/
class OpenCLContext::VirtualSiteInfo : public OpenCLForceInfo {
public:
VirtualSiteInfo(const System& system) : OpenCLForceInfo(0) {
for (int i = 0; i < system.getNumParticles(); i++) {
if (system.isVirtualSite(i)) {
const VirtualSite& vsite = system.getVirtualSite(i);
siteTypes.push_back(&typeid(vsite));
vector<int> particles;
particles.push_back(i);
for (int j = 0; j < vsite.getNumParticles(); j++)
particles.push_back(vsite.getParticle(j));
siteParticles.push_back(particles);
vector<double> weights;
if (dynamic_cast<const TwoParticleAverageSite*>(&vsite) != NULL) {
// A two particle average.
const TwoParticleAverageSite& site = dynamic_cast<const TwoParticleAverageSite&>(vsite);
weights.push_back(site.getWeight(0));
weights.push_back(site.getWeight(1));
}
else if (dynamic_cast<const ThreeParticleAverageSite*>(&vsite) != NULL) {
// A three particle average.
const ThreeParticleAverageSite& site = dynamic_cast<const ThreeParticleAverageSite&>(vsite);
weights.push_back(site.getWeight(0));
weights.push_back(site.getWeight(1));
weights.push_back(site.getWeight(2));
}
else if (dynamic_cast<const OutOfPlaneSite*>(&vsite) != NULL) {
// An out of plane site.
const OutOfPlaneSite& site = dynamic_cast<const OutOfPlaneSite&>(vsite);
weights.push_back(site.getWeight12());
weights.push_back(site.getWeight13());
weights.push_back(site.getWeightCross());
}
siteWeights.push_back(weights);
}
}
}
int getNumParticleGroups() {
return siteTypes.size();
}
void getParticlesInGroup(int index, std::vector<int>& particles) {
particles = siteParticles[index];
}
bool areGroupsIdentical(int group1, int group2) {
if (siteTypes[group1] != siteTypes[group2])
return false;
int numParticles = siteWeights[group1].size();
if (siteWeights[group2].size() != numParticles)
return false;
for (int i = 0; i < numParticles; i++)
if (siteWeights[group1][i] != siteWeights[group2][i])
return false;
return true;
}
private:
vector<const type_info*> siteTypes;
vector<vector<int> > siteParticles;
vector<vector<double> > siteWeights;
};
void OpenCLContext::findMoleculeGroups() {
// The first time this is called, we need to identify all the molecules in the system.
if (moleculeGroups.size() == 0) {
// Add a ForceInfo that makes sure reordering doesn't break virtual sites.
addForce(new VirtualSiteInfo(system));
// First make a list of every other atom to which each atom is connect by a constraint or force group.
vector<vector<int> > atomBonds(system.getNumParticles());
for (int i = 0; i < system.getNumConstraints(); i++) {
int particle1, particle2;
double distance;
system.getConstraintParameters(i, particle1, particle2, distance);
atomBonds[particle1].push_back(particle2);
atomBonds[particle2].push_back(particle1);
}
for (auto force : forces) {
for (int j = 0; j < force->getNumParticleGroups(); j++) {
vector<int> particles;
force->getParticlesInGroup(j, particles);
for (int k = 0; k < (int) particles.size(); k++)
for (int m = 0; m < (int) particles.size(); m++)
if (k != m)
atomBonds[particles[k]].push_back(particles[m]);
}
}
// Now identify atoms by which molecule they belong to.
vector<vector<int> > atomIndices = ContextImpl::findMolecules(numAtoms, atomBonds);
int numMolecules = atomIndices.size();
vector<int> atomMolecule(numAtoms);
for (int i = 0; i < (int) atomIndices.size(); i++)
for (int j = 0; j < (int) atomIndices[i].size(); j++)
atomMolecule[atomIndices[i][j]] = i;
// Construct a description of each molecule.
molecules.resize(numMolecules);
for (int i = 0; i < numMolecules; i++) {
molecules[i].atoms = atomIndices[i];
molecules[i].groups.resize(forces.size());
}
for (int i = 0; i < system.getNumConstraints(); i++) {
int particle1, particle2;
double distance;
system.getConstraintParameters(i, particle1, particle2, distance);
molecules[atomMolecule[particle1]].constraints.push_back(i);
}
for (int i = 0; i < (int) forces.size(); i++)
for (int j = 0; j < forces[i]->getNumParticleGroups(); j++) {
vector<int> particles;
forces[i]->getParticlesInGroup(j, particles);
molecules[atomMolecule[particles[0]]].groups[i].push_back(j);
}
}
// Sort them into groups of identical molecules.
vector<Molecule> uniqueMolecules;
vector<vector<int> > moleculeInstances;
vector<vector<int> > moleculeOffsets;
for (int molIndex = 0; molIndex < (int) molecules.size(); molIndex++) {
Molecule& mol = molecules[molIndex];
// See if it is identical to another molecule.
bool isNew = true;
for (int j = 0; j < (int) uniqueMolecules.size() && isNew; j++) {
Molecule& mol2 = uniqueMolecules[j];
bool identical = (mol.atoms.size() == mol2.atoms.size() && mol.constraints.size() == mol2.constraints.size());
// See if the atoms are identical.
int atomOffset = mol2.atoms[0]-mol.atoms[0];
for (int i = 0; i < (int) mol.atoms.size() && identical; i++) {
if (mol.atoms[i] != mol2.atoms[i]-atomOffset || system.getParticleMass(mol.atoms[i]) != system.getParticleMass(mol2.atoms[i]))
identical = false;
for (int k = 0; k < (int) forces.size(); k++)
if (!forces[k]->areParticlesIdentical(mol.atoms[i], mol2.atoms[i]))
identical = false;
}
// See if the constraints are identical.
for (int i = 0; i < (int) mol.constraints.size() && identical; i++) {
int c1particle1, c1particle2, c2particle1, c2particle2;
double distance1, distance2;
system.getConstraintParameters(mol.constraints[i], c1particle1, c1particle2, distance1);
system.getConstraintParameters(mol2.constraints[i], c2particle1, c2particle2, distance2);
if (c1particle1 != c2particle1-atomOffset || c1particle2 != c2particle2-atomOffset || distance1 != distance2)
identical = false;
}
// See if the force groups are identical.
for (int i = 0; i < (int) forces.size() && identical; i++) {
if (mol.groups[i].size() != mol2.groups[i].size())
identical = false;
for (int k = 0; k < (int) mol.groups[i].size() && identical; k++) {
if (!forces[i]->areGroupsIdentical(mol.groups[i][k], mol2.groups[i][k]))
identical = false;
vector<int> p1, p2;
forces[i]->getParticlesInGroup(mol.groups[i][k], p1);
forces[i]->getParticlesInGroup(mol2.groups[i][k], p2);
for (int m = 0; m < p1.size(); m++)
if (p1[m] != p2[m]-atomOffset)
identical = false;
}
}
if (identical) {
moleculeInstances[j].push_back(molIndex);
moleculeOffsets[j].push_back(mol.atoms[0]);
isNew = false;
}
}
if (isNew) {
uniqueMolecules.push_back(mol);
moleculeInstances.push_back(vector<int>());
moleculeInstances[moleculeInstances.size()-1].push_back(molIndex);
moleculeOffsets.push_back(vector<int>());
moleculeOffsets[moleculeOffsets.size()-1].push_back(mol.atoms[0]);
}
}
moleculeGroups.resize(moleculeInstances.size());
for (int i = 0; i < (int) moleculeInstances.size(); i++)
{
moleculeGroups[i].instances = moleculeInstances[i];
moleculeGroups[i].offsets = moleculeOffsets[i];
vector<int>& atoms = uniqueMolecules[i].atoms;
moleculeGroups[i].atoms.resize(atoms.size());
for (int j = 0; j < (int) atoms.size(); j++)
moleculeGroups[i].atoms[j] = atoms[j]-atoms[0];
}
}
void OpenCLContext::invalidateMolecules() {
for (int i = 0; i < forces.size(); i++)
if (invalidateMolecules(forces[i]))
return;
}
bool OpenCLContext::invalidateMolecules(OpenCLForceInfo* force) {
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff())
return false;
bool valid = true;
int forceIndex = -1;
for (int i = 0; i < forces.size(); i++)
if (forces[i] == force)
forceIndex = i;
getPlatformData().threads.execute([&] (ThreadPool& threads, int threadIndex) {
for (int group = 0; valid && group < (int) moleculeGroups.size(); group++) {
MoleculeGroup& mol = moleculeGroups[group];
vector<int>& instances = mol.instances;
vector<int>& offsets = mol.offsets;
vector<int>& atoms = mol.atoms;
int numMolecules = instances.size();
Molecule& m1 = molecules[instances[0]];
int offset1 = offsets[0];
int numThreads = threads.getNumThreads();
int start = max(1, threadIndex*numMolecules/numThreads);
int end = (threadIndex+1)*numMolecules/numThreads;
for (int j = start; j < end; j++) {
// See if the atoms are identical.
Molecule& m2 = molecules[instances[j]];
int offset2 = offsets[j];
for (int i = 0; i < (int) atoms.size(); i++) {
if (!force->areParticlesIdentical(atoms[i]+offset1, atoms[i]+offset2))
valid = false;
}
// See if the force groups are identical.
if (valid && forceIndex > -1) {
for (int k = 0; k < (int) m1.groups[forceIndex].size(); k++)
if (!force->areGroupsIdentical(m1.groups[forceIndex][k], m2.groups[forceIndex][k]))
valid = false;
}
}
}
});
getPlatformData().threads.waitForThreads();
if (valid)
return false;
// The list of which molecules are identical is no longer valid. We need to restore the
// atoms to their original order, rebuild the list of identical molecules, and sort them
// again.
vector<mm_int4> newCellOffsets(numAtoms);
if (useDoublePrecision) {
vector<mm_double4> oldPosq(paddedNumAtoms);
vector<mm_double4> newPosq(paddedNumAtoms, mm_double4(0,0,0,0));
vector<mm_double4> oldVelm(paddedNumAtoms);
vector<mm_double4> newVelm(paddedNumAtoms, mm_double4(0,0,0,0));
posq.download(oldPosq);
velm.download(oldVelm);
for (int i = 0; i < numAtoms; i++) {
int index = atomIndex[i];
newPosq[index] = oldPosq[i];
newVelm[index] = oldVelm[i];
newCellOffsets[index] = posCellOffsets[i];
}
posq.upload(newPosq);
velm.upload(newVelm);
}
else if (useMixedPrecision) {
vector<mm_float4> oldPosq(paddedNumAtoms);
vector<mm_float4> newPosq(paddedNumAtoms, mm_float4(0,0,0,0));
vector<mm_float4> oldPosqCorrection(paddedNumAtoms);
vector<mm_float4> newPosqCorrection(paddedNumAtoms, mm_float4(0,0,0,0));
vector<mm_double4> oldVelm(paddedNumAtoms);
vector<mm_double4> newVelm(paddedNumAtoms, mm_double4(0,0,0,0));
posq.download(oldPosq);
velm.download(oldVelm);
for (int i = 0; i < numAtoms; i++) {
int index = atomIndex[i];
newPosq[index] = oldPosq[i];
newPosqCorrection[index] = oldPosqCorrection[i];
newVelm[index] = oldVelm[i];
newCellOffsets[index] = posCellOffsets[i];
}
posq.upload(newPosq);
posqCorrection.upload(newPosqCorrection);
velm.upload(newVelm);
}
else {
vector<mm_float4> oldPosq(paddedNumAtoms);
vector<mm_float4> newPosq(paddedNumAtoms, mm_float4(0,0,0,0));
vector<mm_float4> oldVelm(paddedNumAtoms);
vector<mm_float4> newVelm(paddedNumAtoms, mm_float4(0,0,0,0));
posq.download(oldPosq);
velm.download(oldVelm);
for (int i = 0; i < numAtoms; i++) {
int index = atomIndex[i];
newPosq[index] = oldPosq[i];
newVelm[index] = oldVelm[i];
newCellOffsets[index] = posCellOffsets[i];
}
posq.upload(newPosq);
velm.upload(newVelm);
}
for (int i = 0; i < numAtoms; i++) {
atomIndex[i] = i;
posCellOffsets[i] = newCellOffsets[i];
}
atomIndexDevice.upload(atomIndex);
findMoleculeGroups();
for (auto listener : reorderListeners)
listener->execute();
reorderAtoms();
return true;
}
void OpenCLContext::reorderAtoms() {
atomsWereReordered = false;
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff() || stepsSinceReorder < 250) {
stepsSinceReorder++;
return;
}
atomsWereReordered = true;
stepsSinceReorder = 0;
if (useDoublePrecision)
reorderAtomsImpl<cl_double, mm_double4, cl_double, mm_double4>();
else if (useMixedPrecision)
reorderAtomsImpl<cl_float, mm_float4, cl_double, mm_double4>();
else
reorderAtomsImpl<cl_float, mm_float4, cl_float, mm_float4>();
}
template <class Real, class Real4, class Mixed, class Mixed4>
void OpenCLContext::reorderAtomsImpl() {
// Find the range of positions and the number of bins along each axis.
vector<Real4> oldPosq(paddedNumAtoms);
vector<Real4> oldPosqCorrection(paddedNumAtoms);
vector<Mixed4> oldVelm(paddedNumAtoms);
posq.download(oldPosq);
velm.download(oldVelm);
if (useMixedPrecision)
posqCorrection.download(oldPosqCorrection);
Real minx = oldPosq[0].x, maxx = oldPosq[0].x;
Real miny = oldPosq[0].y, maxy = oldPosq[0].y;
Real minz = oldPosq[0].z, maxz = oldPosq[0].z;
if (nonbonded->getUsePeriodic()) {
minx = miny = minz = 0.0;
maxx = periodicBoxSizeDouble.x;
maxy = periodicBoxSizeDouble.y;
maxz = periodicBoxSizeDouble.z;
}
else {
for (int i = 1; i < numAtoms; i++) {
const Real4& pos = oldPosq[i];
minx = min(minx, pos.x);
maxx = max(maxx, pos.x);
miny = min(miny, pos.y);
maxy = max(maxy, pos.y);
minz = min(minz, pos.z);
maxz = max(maxz, pos.z);
}
}
// Loop over each group of identical molecules and reorder them.
vector<int> originalIndex(numAtoms);
vector<Real4> newPosq(paddedNumAtoms, Real4(0,0,0,0));
vector<Real4> newPosqCorrection(paddedNumAtoms, Real4(0,0,0,0));
vector<Mixed4> newVelm(paddedNumAtoms, Mixed4(0,0,0,0));
vector<mm_int4> newCellOffsets(numAtoms);
for (auto& mol : moleculeGroups) {
// Find the center of each molecule.
int numMolecules = mol.offsets.size();
vector<int>& atoms = mol.atoms;
vector<Real4> molPos(numMolecules);
Real invNumAtoms = (Real) (1.0/atoms.size());
for (int i = 0; i < numMolecules; i++) {
molPos[i].x = 0.0f;
molPos[i].y = 0.0f;
molPos[i].z = 0.0f;
for (int j = 0; j < (int)atoms.size(); j++) {
int atom = atoms[j]+mol.offsets[i];
const Real4& pos = oldPosq[atom];
molPos[i].x += pos.x;
molPos[i].y += pos.y;
molPos[i].z += pos.z;
}
molPos[i].x *= invNumAtoms;
molPos[i].y *= invNumAtoms;
molPos[i].z *= invNumAtoms;
if (molPos[i].x != molPos[i].x)
throw OpenMMException("Particle coordinate is nan");
}
if (nonbonded->getUsePeriodic()) {
// Move each molecule position into the same box.
for (int i = 0; i < numMolecules; i++) {
Real4 center = molPos[i];
int zcell = (int) floor(center.z*invPeriodicBoxSize.z);
center.x -= zcell*periodicBoxVecZ.x;
center.y -= zcell*periodicBoxVecZ.y;
center.z -= zcell*periodicBoxVecZ.z;
int ycell = (int) floor(center.y*invPeriodicBoxSize.y);
center.x -= ycell*periodicBoxVecY.x;
center.y -= ycell*periodicBoxVecY.y;
int xcell = (int) floor(center.x*invPeriodicBoxSize.x);
center.x -= xcell*periodicBoxVecX.x;
if (xcell != 0 || ycell != 0 || zcell != 0) {
Real dx = molPos[i].x-center.x;
Real dy = molPos[i].y-center.y;
Real dz = molPos[i].z-center.z;
molPos[i] = center;
for (int j = 0; j < (int) atoms.size(); j++) {
int atom = atoms[j]+mol.offsets[i];
Real4 p = oldPosq[atom];
p.x -= dx;
p.y -= dy;
p.z -= dz;
oldPosq[atom] = p;
posCellOffsets[atom].x -= xcell;
posCellOffsets[atom].y -= ycell;
posCellOffsets[atom].z -= zcell;
}
}
}
}
// Select a bin for each molecule, then sort them by bin.
bool useHilbert = (numMolecules > 5000 || atoms.size() > 8); // For small systems, a simple zigzag curve works better than a Hilbert curve.
Real binWidth;
if (useHilbert)
binWidth = (Real) (max(max(maxx-minx, maxy-miny), maxz-minz)/255.0);
else
binWidth = (Real) (0.2*nonbonded->getMaxCutoffDistance());
Real invBinWidth = (Real) (1.0/binWidth);
int xbins = 1 + (int) ((maxx-minx)*invBinWidth);
int ybins = 1 + (int) ((maxy-miny)*invBinWidth);
vector<pair<int, int> > molBins(numMolecules);
bitmask_t coords[3];
for (int i = 0; i < numMolecules; i++) {
int x = (int) ((molPos[i].x-minx)*invBinWidth);
int y = (int) ((molPos[i].y-miny)*invBinWidth);
int z = (int) ((molPos[i].z-minz)*invBinWidth);
int bin;
if (useHilbert) {
coords[0] = x;
coords[1] = y;
coords[2] = z;
bin = (int) hilbert_c2i(3, 8, coords);
}
else {
int yodd = y&1;
int zodd = z&1;
bin = z*xbins*ybins;
bin += (zodd ? ybins-y : y)*xbins;
bin += (yodd ? xbins-x : x);
}
molBins[i] = pair<int, int>(bin, i);
}
sort(molBins.begin(), molBins.end());
// Reorder the atoms.
for (int i = 0; i < numMolecules; i++) {
for (int atom : atoms) {
int oldIndex = mol.offsets[molBins[i].second]+atom;
int newIndex = mol.offsets[i]+atom;
originalIndex[newIndex] = atomIndex[oldIndex];
newPosq[newIndex] = oldPosq[oldIndex];
if (useMixedPrecision)
newPosqCorrection[newIndex] = oldPosqCorrection[oldIndex];
newVelm[newIndex] = oldVelm[oldIndex];
newCellOffsets[newIndex] = posCellOffsets[oldIndex];
}
}
}
// Update the streams.
for (int i = 0; i < numAtoms; i++) {
atomIndex[i] = originalIndex[i];
posCellOffsets[i] = newCellOffsets[i];
}
posq.upload(newPosq);
if (useMixedPrecision)
posqCorrection.upload(newPosqCorrection);
velm.upload(newVelm);
atomIndexDevice.upload(atomIndex);
for (auto listener : reorderListeners)
listener->execute();
}
void OpenCLContext::addReorderListener(ReorderListener* listener) {
reorderListeners.push_back(listener);
}
void OpenCLContext::addPreComputation(ForcePreComputation* computation) {
preComputations.push_back(computation);
}
void OpenCLContext::addPostComputation(ForcePostComputation* computation) {
postComputations.push_back(computation);
}
void OpenCLContext::addEnergyParameterDerivative(const string& param) { void OpenCLContext::addEnergyParameterDerivative(const string& param) {
// See if this parameter has already been registered. // See if this parameter has already been registered.
...@@ -1315,85 +800,6 @@ void OpenCLContext::addEnergyParameterDerivative(const string& param) { ...@@ -1315,85 +800,6 @@ void OpenCLContext::addEnergyParameterDerivative(const string& param) {
energyParamDerivNames.push_back(param); energyParamDerivNames.push_back(param);
} }
struct OpenCLContext::WorkThread::ThreadData { void OpenCLContext::flushQueue() {
ThreadData(std::queue<OpenCLContext::WorkTask*>& tasks, bool& waiting, bool& finished, getQueue().flush();
pthread_mutex_t& queueLock, pthread_cond_t& waitForTaskCondition, pthread_cond_t& queueEmptyCondition) :
tasks(tasks), waiting(waiting), finished(finished), queueLock(queueLock),
waitForTaskCondition(waitForTaskCondition), queueEmptyCondition(queueEmptyCondition) {
}
std::queue<OpenCLContext::WorkTask*>& tasks;
bool& waiting;
bool& finished;
pthread_mutex_t& queueLock;
pthread_cond_t& waitForTaskCondition;
pthread_cond_t& queueEmptyCondition;
};
static void* threadBody(void* args) {
OpenCLContext::WorkThread::ThreadData& data = *reinterpret_cast<OpenCLContext::WorkThread::ThreadData*>(args);
while (!data.finished || data.tasks.size() > 0) {
pthread_mutex_lock(&data.queueLock);
while (data.tasks.empty() && !data.finished) {
data.waiting = true;
pthread_cond_signal(&data.queueEmptyCondition);
pthread_cond_wait(&data.waitForTaskCondition, &data.queueLock);
}
OpenCLContext::WorkTask* task = NULL;
if (!data.tasks.empty()) {
data.waiting = false;
task = data.tasks.front();
data.tasks.pop();
}
pthread_mutex_unlock(&data.queueLock);
if (task != NULL) {
task->execute();
delete task;
}
}
data.waiting = true;
pthread_cond_signal(&data.queueEmptyCondition);
delete &data;
return 0;
}
OpenCLContext::WorkThread::WorkThread() : waiting(true), finished(false) {
pthread_mutex_init(&queueLock, NULL);
pthread_cond_init(&waitForTaskCondition, NULL);
pthread_cond_init(&queueEmptyCondition, NULL);
ThreadData* data = new ThreadData(tasks, waiting, finished, queueLock, waitForTaskCondition, queueEmptyCondition);
pthread_create(&thread, NULL, threadBody, data);
}
OpenCLContext::WorkThread::~WorkThread() {
pthread_mutex_lock(&queueLock);
finished = true;
pthread_cond_broadcast(&waitForTaskCondition);
pthread_mutex_unlock(&queueLock);
pthread_join(thread, NULL);
pthread_mutex_destroy(&queueLock);
pthread_cond_destroy(&waitForTaskCondition);
pthread_cond_destroy(&queueEmptyCondition);
}
void OpenCLContext::WorkThread::addTask(OpenCLContext::WorkTask* task) {
pthread_mutex_lock(&queueLock);
tasks.push(task);
waiting = false;
pthread_cond_signal(&waitForTaskCondition);
pthread_mutex_unlock(&queueLock);
}
bool OpenCLContext::WorkThread::isWaiting() {
return waiting;
}
bool OpenCLContext::WorkThread::isFinished() {
return finished;
}
void OpenCLContext::WorkThread::flush() {
pthread_mutex_lock(&queueLock);
while (!waiting)
pthread_cond_wait(&queueEmptyCondition, &queueLock);
pthread_mutex_unlock(&queueLock);
} }
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "OpenCLEvent.h"
using namespace OpenMM;
OpenCLEvent::OpenCLEvent(OpenCLContext& context) : context(context) {
}
void OpenCLEvent::enqueue() {
context.getQueue().enqueueMarker(&event);
}
void OpenCLEvent::wait() {
event.wait();
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment