Commit 1f7866ad authored by peastman's avatar peastman Committed by GitHub
Browse files

Merge pull request #1547 from peastman/paramderivs

Energy derivatives with respect to global parameters
parents 37787af9 7851bad8
......@@ -76,7 +76,7 @@ bool CudaContext::hasInitializedCuda = false;
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system), currentStream(0),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false),
pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), atomIndexDevice(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
this->compiler = "\""+compiler+"\"";
if (platformData.context != NULL) {
try {
......@@ -339,6 +339,8 @@ CudaContext::~CudaContext() {
delete force;
if (energyBuffer != NULL)
delete energyBuffer;
if (energyParamDerivBuffer != NULL)
delete energyParamDerivBuffer;
if (atomIndexDevice != NULL)
delete atomIndexDevice;
if (integration != NULL)
......@@ -390,6 +392,14 @@ void CudaContext::initialize() {
force = CudaArray::create<long long>(*this, paddedNumAtoms*3, "force");
addAutoclearBuffer(force->getDevicePointer(), force->getSize()*force->getElementSize());
addAutoclearBuffer(energyBuffer->getDevicePointer(), energyBuffer->getSize()*energyBuffer->getElementSize());
int numEnergyParamDerivs = energyParamDerivNames.size();
if (numEnergyParamDerivs > 0) {
if (useDoublePrecision || useMixedPrecision)
energyParamDerivBuffer = CudaArray::create<double>(*this, numEnergyParamDerivs*numEnergyBuffers, "energyParamDerivBuffer");
else
energyParamDerivBuffer = CudaArray::create<float>(*this, numEnergyParamDerivs*numEnergyBuffers, "energyParamDerivBuffer");
addAutoclearBuffer(*energyParamDerivBuffer);
}
atomIndexDevice = CudaArray::create<int>(*this, paddedNumAtoms, "atomIndex");
atomIndex.resize(paddedNumAtoms);
for (int i = 0; i < paddedNumAtoms; ++i)
......@@ -1311,6 +1321,15 @@ void CudaContext::addPostComputation(ForcePostComputation* computation) {
postComputations.push_back(computation);
}
void CudaContext::addEnergyParameterDerivative(const string& param) {
// See if this parameter has already been registered.
for (int i = 0; i < energyParamDerivNames.size(); i++)
if (param == energyParamDerivNames[i])
return;
energyParamDerivNames.push_back(param);
}
struct CudaContext::WorkThread::ThreadData {
ThreadData(std::queue<CudaContext::WorkTask*>& tasks, bool& waiting, bool& finished,
pthread_mutex_t& queueLock, pthread_cond_t& waitForTaskCondition, pthread_cond_t& queueEmptyCondition) :
......
This diff is collapsed.
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -146,6 +146,19 @@ void CudaNonbondedUtilities::addArgument(const ParameterInfo& parameter) {
arguments.push_back(parameter);
}
string CudaNonbondedUtilities::addEnergyParameterDerivative(const string& param) {
// See if the parameter has already been added.
int index;
for (index = 0; index < energyParameterDerivatives.size(); index++)
if (param == energyParameterDerivatives[index])
break;
if (index == energyParameterDerivatives.size())
energyParameterDerivatives.push_back(param);
context.addEnergyParameterDerivative(param);
return string("energyParamDeriv")+context.intToString(index);
}
void CudaNonbondedUtilities::requestExclusions(const vector<vector<int> >& exclusionList) {
if (anyExclusions) {
bool sameExclusions = (exclusionList.size() == atomExclusions.size());
......@@ -308,6 +321,8 @@ void CudaNonbondedUtilities::initialize(const System& system) {
forceArgs.push_back(&parameters[i].getMemory());
for (int i = 0; i < (int) arguments.size(); i++)
forceArgs.push_back(&arguments[i].getMemory());
if (energyParameterDerivatives.size() > 0)
forceArgs.push_back(&context.getEnergyParamDerivBuffer().getDevicePointer());
if (useCutoff) {
findBlockBoundsArgs.push_back(&numAtoms);
findBlockBoundsArgs.push_back(context.getPeriodicBoxSizePointer());
......@@ -515,6 +530,8 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
args << "* __restrict__ ";
args << arguments[i].getName();
}
if (energyParameterDerivatives.size() > 0)
args << ", mixed* __restrict__ energyParamDerivs";
replacements["PARAMETER_ARGUMENTS"] = args.str();
stringstream load1;
......@@ -623,6 +640,18 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
}
}
replacements["LOAD_ATOM2_PARAMETERS"] = load2j.str();
stringstream initDerivs;
for (int i = 0; i < energyParameterDerivatives.size(); i++)
initDerivs<<"mixed energyParamDeriv"<<i<<" = 0;\n";
replacements["INIT_DERIVATIVES"] = initDerivs.str();
stringstream saveDerivs;
const vector<string>& allParamDerivNames = context.getEnergyParamDerivNames();
int numDerivs = allParamDerivNames.size();
for (int i = 0; i < energyParameterDerivatives.size(); i++)
for (int index = 0; index < numDerivs; index++)
if (allParamDerivNames[index] == energyParameterDerivatives[i])
saveDerivs<<"energyParamDerivs[(blockIdx.x*blockDim.x+threadIdx.x)*"<<numDerivs<<"+"<<index<<"] += energyParamDeriv"<<i<<";\n";
replacements["SAVE_DERIVATIVES"] = saveDerivs.str();
stringstream shuffleWarpData;
if(useShuffle) {
......
......@@ -111,10 +111,12 @@ extern "C" __global__ void computeGroupForces(unsigned long long* __restrict__ g
const int* __restrict__ bondGroups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
EXTRA_ARGS) {
mixed energy = 0;
INIT_PARAM_DERIVS
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_BONDS; index += blockDim.x*gridDim.x) {
COMPUTE_FORCE
}
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
SAVE_PARAM_DERIVS
}
/**
......
......@@ -28,6 +28,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx;
mixed energy = 0;
INIT_PARAM_DERIVS
__shared__ AtomData localData[THREAD_BLOCK_SIZE];
// First loop: process tiles that contain exclusions.
......@@ -69,6 +70,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = y*TILE_SIZE+j;
real dEdR = 0;
real tempEnergy = 0;
const real interactionScale = 0.5f;
#ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1);
#endif
......@@ -120,6 +122,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = y*TILE_SIZE+tj;
real dEdR = 0;
real tempEnergy = 0;
const real interactionScale = 1;
#ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1);
#endif
......@@ -266,6 +269,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = atomIndices[tbx+tj];
real dEdR = 0;
real tempEnergy = 0;
const real interactionScale = 1;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_INTERACTION
dEdR /= -r;
......@@ -309,6 +313,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = atomIndices[tbx+tj];
real dEdR = 0;
real tempEnergy = 0;
const real interactionScale = 1;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_INTERACTION
dEdR /= -r;
......@@ -353,4 +358,5 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
pos++;
}
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
SAVE_PARAM_DERIVS
}
......@@ -5,6 +5,7 @@
extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq
PARAMETER_ARGUMENTS) {
mixed energy = 0;
INIT_PARAM_DERIVS
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
// Load the derivatives
......@@ -17,4 +18,5 @@ extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forc
COMPUTE_ENERGY
}
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
SAVE_PARAM_DERIVS
}
......@@ -4,6 +4,7 @@
extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__ forceBuffers, const real4* __restrict__ posq
PARAMETER_ARGUMENTS) {
INIT_PARAM_DERIVS
const real scale = RECIP((real) 0x100000000);
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
real4 pos = posq[index];
......@@ -13,4 +14,5 @@ extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__
forceBuffers[index+PADDED_NUM_ATOMS] = (long long) (force.y*0x100000000);
forceBuffers[index+PADDED_NUM_ATOMS*2] = (long long) (force.z*0x100000000);
}
SAVE_PARAM_DERIVS
}
......@@ -73,6 +73,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
COMPUTE_VALUE
}
value += tempValue1;
ADD_TEMP_DERIVS1
#ifdef USE_CUTOFF
}
#endif
......@@ -121,6 +122,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
value += tempValue1;
localData[tbx+tj].value += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF
}
#endif
......@@ -133,11 +136,13 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Write results.
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&global_value[offset], static_cast<unsigned long long>((long long) (value*0x100000000)));
unsigned int offset1 = x*TILE_SIZE + tgx;
atomicAdd(&global_value[offset1], static_cast<unsigned long long>((long long) (value*0x100000000)));
STORE_PARAM_DERIVS1
if (x != y) {
offset = y*TILE_SIZE + tgx;
atomicAdd(&global_value[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].value*0x100000000)));
unsigned int offset2 = y*TILE_SIZE + tgx;
atomicAdd(&global_value[offset2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].value*0x100000000)));
STORE_PARAM_DERIVS2
}
}
......@@ -244,6 +249,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
value += tempValue1;
localData[tbx+tj].value += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
......@@ -276,6 +283,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
value += tempValue1;
localData[tbx+tj].value += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF
}
#endif
......@@ -285,14 +294,19 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Write results.
atomicAdd(&global_value[atom1], static_cast<unsigned long long>((long long) (value*0x100000000)));
unsigned int offset1 = atom1;
atomicAdd(&global_value[offset1], static_cast<unsigned long long>((long long) (value*0x100000000)));
STORE_PARAM_DERIVS1
#ifdef USE_CUTOFF
unsigned int atom2 = atomIndices[threadIdx.x];
#else
unsigned int atom2 = y*TILE_SIZE + tgx;
#endif
if (atom2 < PADDED_NUM_ATOMS)
atomicAdd(&global_value[atom2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].value*0x100000000)));
if (atom2 < PADDED_NUM_ATOMS) {
unsigned int offset2 = atom2;
atomicAdd(&global_value[offset2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].value*0x100000000)));
STORE_PARAM_DERIVS2
}
}
pos++;
}
......
......@@ -8,6 +8,7 @@ extern "C" __global__ void computePerParticleValues(real4* posq, long long* valu
// Load the pairwise value
real sum = valueBuffers[index]/(real) 0x100000000;
REDUCE_PARAM0_DERIV
// Now calculate other values
......
......@@ -33,7 +33,8 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ posDelta,
mixed4* __restrict__ velm, const long long* __restrict__ force, const mixed2* __restrict__ dt, const mixed* __restrict__ globals,
mixed* __restrict__ sum, const float4* __restrict__ gaussianValues, unsigned int gaussianBaseIndex, const float4* __restrict__ uniformValues, const real energy
mixed* __restrict__ sum, const float4* __restrict__ gaussianValues, unsigned int gaussianBaseIndex, const float4* __restrict__ uniformValues,
const real energy, mixed* __restrict__ energyParamDerivs
PARAMETER_ARGUMENTS) {
mixed stepSize = dt[0].y;
int index = blockIdx.x*blockDim.x+threadIdx.x;
......
......@@ -4,15 +4,18 @@ if (!isExcluded && r2 < CUTOFF_SQUARED) {
if (!isExcluded) {
#endif
real tempForce = 0;
COMPUTE_FORCE
real switchValue = 1, switchDeriv = 0;
#if USE_SWITCH
if (r > SWITCH_CUTOFF) {
real x = r-SWITCH_CUTOFF;
real switchValue = 1+x*x*x*(SWITCH_C3+x*(SWITCH_C4+x*SWITCH_C5));
real switchDeriv = x*x*(3*SWITCH_C3+x*(4*SWITCH_C4+x*5*SWITCH_C5));
tempForce = tempForce*switchValue - tempEnergy*switchDeriv;
tempEnergy *= switchValue;
switchValue = 1+x*x*x*(SWITCH_C3+x*(SWITCH_C4+x*SWITCH_C5));
switchDeriv = x*x*(3*SWITCH_C3+x*(4*SWITCH_C4+x*5*SWITCH_C5));
}
#endif
COMPUTE_FORCE
#if USE_SWITCH
tempForce = tempForce*switchValue - tempEnergy*switchDeriv;
tempEnergy *= switchValue;
#endif
dEdR += tempForce*invR;
}
......@@ -113,6 +113,7 @@ extern "C" __global__ void computeNonbonded(
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); // index within the warp
const unsigned int tbx = threadIdx.x - tgx; // block warpIndex
mixed energy = 0;
INIT_DERIVATIVES
// used shared memory if the device cannot shuffle
#ifndef ENABLE_SHUFFLE
__shared__ AtomData localData[THREAD_BLOCK_SIZE];
......@@ -175,6 +176,7 @@ extern "C" __global__ void computeNonbonded(
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
#endif
real tempEnergy = 0.0f;
const real interactionScale = 0.5f;
COMPUTE_INTERACTION
energy += 0.5f*tempEnergy;
#ifdef INCLUDE_FORCES
......@@ -243,6 +245,7 @@ extern "C" __global__ void computeNonbonded(
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
#endif
real tempEnergy = 0.0f;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
......@@ -448,6 +451,7 @@ extern "C" __global__ void computeNonbonded(
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS);
#endif
real tempEnergy = 0.0f;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
......@@ -518,6 +522,7 @@ extern "C" __global__ void computeNonbonded(
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS);
#endif
real tempEnergy = 0.0f;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
......@@ -586,4 +591,5 @@ extern "C" __global__ void computeNonbonded(
#ifdef INCLUDE_ENERGY
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
#endif
SAVE_DERIVATIVES
}
\ No newline at end of file
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2015 Stanford University and the Authors. *
* Portions copyright (c) 2011-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -100,6 +100,15 @@ public:
* refer to it by this name.
*/
std::string addArgument(cl::Memory& data, const std::string& type);
/**
* Register that the interaction kernel will be computing the derivative of the potential energy
* with respect to a parameter.
*
* @param param the name of the parameter
* @return the variable that will be used to accumulate the derivative. Any code you pass to addInteraction() should
* add its contributions to this variable.
*/
std::string addEnergyParameterDerivative(const std::string& param);
/**
* Add some OpenCL code that should be included in the program, before the start of the kernel.
* This can be used, for example, to define functions that will be called by the kernel.
......@@ -137,6 +146,7 @@ private:
std::vector<OpenCLArray*> atomIndices;
std::vector<OpenCLArray*> bufferIndices;
std::vector<std::string> prefixCode;
std::vector<std::string> energyParameterDerivatives;
int numForceBuffers, maxBonds, allGroups;
bool hasInitializedKernels;
};
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -264,6 +264,12 @@ public:
OpenCLArray& getEnergyBuffer() {
return *energyBuffer;
}
/**
* Get the array which contains the buffer in which derivatives of the energy with respect to parameters are computed.
*/
OpenCLArray& getEnergyParamDerivBuffer() {
return *energyParamDerivBuffer;
}
/**
* Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
* This is guaranteed to be at least as large as any of the arrays returned by methods of this class.
......@@ -659,6 +665,27 @@ public:
std::vector<ForcePostComputation*>& getPostComputations() {
return postComputations;
}
/**
* Get the names of all parameters with respect to which energy derivatives are computed.
*/
const std::vector<std::string>& getEnergyParamDerivNames() const {
return energyParamDerivNames;
}
/**
* Get a workspace data structure used for accumulating the values of derivatives of the energy
* with respect to parameters.
*/
std::map<std::string, double>& getEnergyParamDerivWorkspace() {
return energyParamDerivWorkspace;
}
/**
* Register that the derivative of potential energy with respect to a context parameter
* will need to be calculated. If this is called multiple times for a single parameter,
* it is only added to the list once.
*
* @param param the name of the parameter to add
*/
void addEnergyParameterDerivative(const std::string& param);
/**
* Mark that the current molecule definitions (and hence the atom order) may be invalid.
* This should be called whenever force field parameters change. It will cause the definitions
......@@ -725,7 +752,10 @@ private:
OpenCLArray* forceBuffers;
OpenCLArray* longForceBuffer;
OpenCLArray* energyBuffer;
OpenCLArray* energyParamDerivBuffer;
OpenCLArray* atomIndexDevice;
std::vector<std::string> energyParamDerivNames;
std::map<std::string, double> energyParamDerivWorkspace;
std::vector<int> atomIndex;
std::vector<cl::Memory*> autoclearBuffers;
std::vector<int> autoclearBufferSizes;
......
......@@ -141,6 +141,12 @@ public:
* @param forces on exit, this contains the forces
*/
void getForces(ContextImpl& context, std::vector<Vec3>& forces);
/**
* Get the current derivatives of the energy with respect to context parameters.
*
* @param derivs on exit, this contains the derivatives
*/
void getEnergyParameterDerivatives(ContextImpl& context, std::map<std::string, double>& derivs);
/**
* Get the current periodic box vectors.
*
......@@ -709,6 +715,7 @@ private:
std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray*> tabulatedFunctions;
double longRangeCoefficient;
std::vector<double> longRangeCoefficientDerivs;
bool hasInitializedLongRangeCorrection, hasInitializedKernel;
int numGroupThreadBlocks;
CustomNonbondedForce* forceCopy;
......@@ -801,13 +808,15 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomGBForce& force);
private:
double cutoff;
bool hasInitializedKernels, needParameterGradient;
bool hasInitializedKernels, needParameterGradient, needEnergyParamDerivs;
int maxTiles, numComputedValues;
OpenCLContext& cl;
OpenCLParameterSet* params;
OpenCLParameterSet* computedValues;
OpenCLParameterSet* energyDerivs;
OpenCLParameterSet* energyDerivChain;
std::vector<OpenCLParameterSet*> dValuedParam;
std::vector<OpenCLArray*> dValue0dParam;
OpenCLArray* longEnergyDerivs;
OpenCLArray* globals;
OpenCLArray* valueBuffers;
......@@ -953,6 +962,7 @@ public:
private:
int numGroups, numBonds;
bool needEnergyParamDerivs;
OpenCLContext& cl;
OpenCLParameterSet* params;
OpenCLArray* globals;
......@@ -1273,7 +1283,7 @@ public:
enum GlobalTargetType {DT, VARIABLE, PARAMETER};
OpenCLIntegrateCustomStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateCustomStepKernel(name, platform), cl(cl),
hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), sumBuffer(NULL), summedValue(NULL), uniformRandoms(NULL),
randomSeed(NULL), perDofValues(NULL) {
randomSeed(NULL), perDofEnergyParamDerivs(NULL), perDofValues(NULL), needsEnergyParamDerivs(false) {
}
~OpenCLIntegrateCustomStepKernel();
/**
......@@ -1338,8 +1348,11 @@ public:
private:
class ReorderListener;
class GlobalTarget;
class DerivFunction;
std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName);
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
Lepton::ExpressionTreeNode replaceDerivFunctions(const Lepton::ExpressionTreeNode& node, OpenMM::ContextImpl& context);
void findExpressionsForDerivs(const Lepton::ExpressionTreeNode& node, std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variableNodes);
void recordGlobalValue(double value, GlobalTarget target);
void recordChangedParameters(ContextImpl& context);
bool evaluateCondition(int step);
......@@ -1347,18 +1360,23 @@ private:
double energy;
float energyFloat;
int numGlobalVariables;
bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints;
bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs;
mutable bool localValuesAreCurrent;
OpenCLArray* globalValues;
OpenCLArray* sumBuffer;
OpenCLArray* summedValue;
OpenCLArray* uniformRandoms;
OpenCLArray* randomSeed;
OpenCLArray* perDofEnergyParamDerivs;
std::map<int, OpenCLArray*> savedForces;
std::set<int> validSavedForces;
OpenCLParameterSet* perDofValues;
mutable std::vector<std::vector<cl_float> > localPerDofValuesFloat;
mutable std::vector<std::vector<cl_double> > localPerDofValuesDouble;
std::map<std::string, double> energyParamDerivs;
std::vector<std::string> perDofEnergyParamDerivNames;
std::vector<cl_float> localPerDofEnergyParamDerivsFloat;
std::vector<cl_double> localPerDofEnergyParamDerivsDouble;
std::vector<float> globalValuesFloat;
std::vector<double> globalValuesDouble;
std::vector<double> initialGlobalVariables;
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2013 Stanford University and the Authors. *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -88,6 +88,15 @@ public:
* Add an array (other than a per-atom parameter) that should be passed as an argument to the default interaction kernel.
*/
void addArgument(const ParameterInfo& parameter);
/**
* Register that the interaction kernel will be computing the derivative of the potential energy
* with respect to a parameter.
*
* @param param the name of the parameter
* @return the variable that will be used to accumulate the derivative. Any code you pass to addInteraction() should
* add its contributions to this variable.
*/
std::string addEnergyParameterDerivative(const std::string& param);
/**
* Specify the list of exclusions that an interaction outside the default kernel will depend on.
*
......@@ -287,6 +296,7 @@ private:
std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters;
std::vector<ParameterInfo> arguments;
std::vector<std::string> energyParameterDerivatives;
std::map<int, double> groupCutoff;
std::map<int, std::string> groupKernelSource;
double lastCutoff;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2015 Stanford University and the Authors. *
* Portions copyright (c) 2011-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -56,12 +56,25 @@ void OpenCLBondedUtilities::addInteraction(const vector<vector<int> >& atoms, co
}
}
std::string OpenCLBondedUtilities::addArgument(cl::Memory& data, const string& type) {
string OpenCLBondedUtilities::addArgument(cl::Memory& data, const string& type) {
arguments.push_back(&data);
argTypes.push_back(type);
return "customArg"+context.intToString(arguments.size());
}
string OpenCLBondedUtilities::addEnergyParameterDerivative(const string& param) {
// See if the parameter has already been added.
int index;
for (index = 0; index < energyParameterDerivatives.size(); index++)
if (param == energyParameterDerivatives[index])
break;
if (index == energyParameterDerivatives.size())
energyParameterDerivatives.push_back(param);
context.addEnergyParameterDerivative(param);
return string("energyParamDeriv")+context.intToString(index);
}
void OpenCLBondedUtilities::addPrefixCode(const string& source) {
for (int i = 0; i < (int) prefixCode.size(); i++)
if (prefixCode[i] == source)
......@@ -190,13 +203,23 @@ void OpenCLBondedUtilities::initialize(const System& system) {
}
for (int i = 0; i < (int) arguments.size(); i++)
s<<", __global "<<argTypes[i]<<"* customArg"<<(i+1);
if (energyParameterDerivatives.size() > 0)
s<<", __global mixed* restrict energyParamDerivs";
s<<") {\n";
s<<"mixed energy = 0;\n";
for (int i = 0; i < energyParameterDerivatives.size(); i++)
s<<"mixed energyParamDeriv"<<i<<" = 0;\n";
for (int i = 0; i < setSize; i++) {
int force = set[i];
s<<createForceSource(i, forceAtoms[force].size(), forceAtoms[force][0].size(), forceGroup[force], forceSource[force]);
}
s<<"energyBuffer[get_global_id(0)] += energy;\n";
const vector<string>& allParamDerivNames = context.getEnergyParamDerivNames();
int numDerivs = allParamDerivNames.size();
for (int i = 0; i < energyParameterDerivatives.size(); i++)
for (int index = 0; index < numDerivs; index++)
if (allParamDerivNames[index] == energyParameterDerivatives[i])
s<<"energyParamDerivs[get_global_id(0)*"<<numDerivs<<"+"<<index<<"] += energyParamDeriv"<<i<<";\n";
s<<"}\n";
map<string, string> defines;
defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms());
......@@ -274,6 +297,8 @@ void OpenCLBondedUtilities::computeInteractions(int groups) {
}
for (int j = 0; j < (int) arguments.size(); j++)
kernel.setArg<cl::Memory>(index++, *arguments[j]);
if (energyParameterDerivatives.size() > 0)
kernel.setArg<cl::Memory>(index++, context.getEnergyParamDerivBuffer().getDeviceBuffer());
}
}
for (int i = 0; i < (int) kernels.size(); i++) {
......
......@@ -69,7 +69,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData) :
system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false), posq(NULL),
posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndexDevice(NULL), integration(NULL),
posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), integration(NULL),
expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
if (precision == "single") {
useDoublePrecision = false;
......@@ -435,6 +435,8 @@ OpenCLContext::~OpenCLContext() {
delete longForceBuffer;
if (energyBuffer != NULL)
delete energyBuffer;
if (energyParamDerivBuffer != NULL)
delete energyParamDerivBuffer;
if (atomIndexDevice != NULL)
delete atomIndexDevice;
if (integration != NULL)
......@@ -455,15 +457,16 @@ void OpenCLContext::initialize() {
numForceBuffers = std::max(numForceBuffers, bonded->getNumForceBuffers());
for (int i = 0; i < (int) forces.size(); i++)
numForceBuffers = std::max(numForceBuffers, forces[i]->getRequiredForceBuffers());
int energyBufferSize = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers());
if (useDoublePrecision) {
forceBuffers = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
force = OpenCLArray::create<mm_double4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force");
energyBuffer = OpenCLArray::create<cl_double>(*this, max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers()), "energyBuffer");
energyBuffer = OpenCLArray::create<cl_double>(*this, energyBufferSize, "energyBuffer");
}
else {
forceBuffers = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
force = OpenCLArray::create<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force");
energyBuffer = OpenCLArray::create<cl_double>(*this, max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers()), "energyBuffer");
energyBuffer = OpenCLArray::create<cl_double>(*this, energyBufferSize, "energyBuffer");
}
if (supports64BitGlobalAtomics) {
longForceBuffer = OpenCLArray::create<cl_long>(*this, 3*paddedNumAtoms, "longForceBuffer");
......@@ -475,7 +478,15 @@ void OpenCLContext::initialize() {
}
addAutoclearBuffer(*forceBuffers);
addAutoclearBuffer(*energyBuffer);
int bufferBytes = max(velm->getSize()*velm->getElementSize(), energyBuffer->getSize()*energyBuffer->getElementSize());
int numEnergyParamDerivs = energyParamDerivNames.size();
if (numEnergyParamDerivs > 0) {
if (useDoublePrecision || useMixedPrecision)
energyParamDerivBuffer = OpenCLArray::create<cl_double>(*this, numEnergyParamDerivs*energyBufferSize, "energyParamDerivBuffer");
else
energyParamDerivBuffer = OpenCLArray::create<cl_float>(*this, numEnergyParamDerivs*energyBufferSize, "energyParamDerivBuffer");
addAutoclearBuffer(*energyParamDerivBuffer);
}
int bufferBytes = max(velm->getSize()*velm->getElementSize(), energyBufferSize*energyBuffer->getElementSize());
pinnedBuffer = new cl::Buffer(context, CL_MEM_ALLOC_HOST_PTR, bufferBytes);
pinnedMemory = currentQueue.enqueueMapBuffer(*pinnedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
for (int i = 0; i < numAtoms; i++) {
......@@ -1229,6 +1240,15 @@ void OpenCLContext::addPostComputation(ForcePostComputation* computation) {
postComputations.push_back(computation);
}
void OpenCLContext::addEnergyParameterDerivative(const string& param) {
// See if this parameter has already been registered.
for (int i = 0; i < energyParamDerivNames.size(); i++)
if (param == energyParamDerivNames[i])
return;
energyParamDerivNames.push_back(param);
}
struct OpenCLContext::WorkThread::ThreadData {
ThreadData(std::queue<OpenCLContext::WorkTask*>& tasks, bool& waiting, bool& finished,
pthread_mutex_t& queueLock, pthread_cond_t& waitForTaskCondition, pthread_cond_t& queueEmptyCondition) :
......
This diff is collapsed.
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -162,6 +162,19 @@ void OpenCLNonbondedUtilities::addArgument(const ParameterInfo& parameter) {
arguments.push_back(parameter);
}
string OpenCLNonbondedUtilities::addEnergyParameterDerivative(const string& param) {
// See if the parameter has already been added.
int index;
for (index = 0; index < energyParameterDerivatives.size(); index++)
if (param == energyParameterDerivatives[index])
break;
if (index == energyParameterDerivatives.size())
energyParameterDerivatives.push_back(param);
context.addEnergyParameterDerivative(param);
return string("energyParamDeriv")+context.intToString(index);
}
void OpenCLNonbondedUtilities::requestExclusions(const vector<vector<int> >& exclusionList) {
if (anyExclusions) {
bool sameExclusions = (exclusionList.size() == atomExclusions.size());
......@@ -591,6 +604,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
args << arguments[i].getName();
}
}
if (energyParameterDerivatives.size() > 0)
args << ", __global mixed* restrict energyParamDerivs";
replacements["PARAMETER_ARGUMENTS"] = args.str();
stringstream loadLocal1;
for (int i = 0; i < (int) params.size(); i++) {
......@@ -641,6 +656,18 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
}
}
replacements["LOAD_ATOM2_PARAMETERS"] = load2j.str();
stringstream initDerivs;
for (int i = 0; i < energyParameterDerivatives.size(); i++)
initDerivs<<"mixed energyParamDeriv"<<i<<" = 0;\n";
replacements["INIT_DERIVATIVES"] = initDerivs.str();
stringstream saveDerivs;
const vector<string>& allParamDerivNames = context.getEnergyParamDerivNames();
int numDerivs = allParamDerivNames.size();
for (int i = 0; i < energyParameterDerivatives.size(); i++)
for (int index = 0; index < numDerivs; index++)
if (allParamDerivNames[index] == energyParameterDerivatives[i])
saveDerivs<<"energyParamDerivs[get_global_id(0)*"<<numDerivs<<"+"<<index<<"] += energyParamDeriv"<<i<<";\n";
replacements["SAVE_DERIVATIVES"] = saveDerivs.str();
map<string, string> defines;
if (useCutoff)
defines["USE_CUTOFF"] = "1";
......@@ -716,5 +743,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
for (int i = 0; i < (int) arguments.size(); i++) {
kernel.setArg<cl::Memory>(index++, arguments[i].getMemory());
}
if (energyParameterDerivatives.size() > 0)
kernel.setArg<cl::Memory>(index++, context.getEnergyParamDerivBuffer().getDeviceBuffer());
return kernel;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment