Commit 1f7866ad authored by peastman's avatar peastman Committed by GitHub
Browse files

Merge pull request #1547 from peastman/paramderivs

Energy derivatives with respect to global parameters
parents 37787af9 7851bad8
...@@ -76,7 +76,7 @@ bool CudaContext::hasInitializedCuda = false; ...@@ -76,7 +76,7 @@ bool CudaContext::hasInitializedCuda = false;
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler, CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system), currentStream(0), const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system), currentStream(0),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false),
pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), atomIndexDevice(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) { pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
this->compiler = "\""+compiler+"\""; this->compiler = "\""+compiler+"\"";
if (platformData.context != NULL) { if (platformData.context != NULL) {
try { try {
...@@ -339,6 +339,8 @@ CudaContext::~CudaContext() { ...@@ -339,6 +339,8 @@ CudaContext::~CudaContext() {
delete force; delete force;
if (energyBuffer != NULL) if (energyBuffer != NULL)
delete energyBuffer; delete energyBuffer;
if (energyParamDerivBuffer != NULL)
delete energyParamDerivBuffer;
if (atomIndexDevice != NULL) if (atomIndexDevice != NULL)
delete atomIndexDevice; delete atomIndexDevice;
if (integration != NULL) if (integration != NULL)
...@@ -390,6 +392,14 @@ void CudaContext::initialize() { ...@@ -390,6 +392,14 @@ void CudaContext::initialize() {
force = CudaArray::create<long long>(*this, paddedNumAtoms*3, "force"); force = CudaArray::create<long long>(*this, paddedNumAtoms*3, "force");
addAutoclearBuffer(force->getDevicePointer(), force->getSize()*force->getElementSize()); addAutoclearBuffer(force->getDevicePointer(), force->getSize()*force->getElementSize());
addAutoclearBuffer(energyBuffer->getDevicePointer(), energyBuffer->getSize()*energyBuffer->getElementSize()); addAutoclearBuffer(energyBuffer->getDevicePointer(), energyBuffer->getSize()*energyBuffer->getElementSize());
int numEnergyParamDerivs = energyParamDerivNames.size();
if (numEnergyParamDerivs > 0) {
if (useDoublePrecision || useMixedPrecision)
energyParamDerivBuffer = CudaArray::create<double>(*this, numEnergyParamDerivs*numEnergyBuffers, "energyParamDerivBuffer");
else
energyParamDerivBuffer = CudaArray::create<float>(*this, numEnergyParamDerivs*numEnergyBuffers, "energyParamDerivBuffer");
addAutoclearBuffer(*energyParamDerivBuffer);
}
atomIndexDevice = CudaArray::create<int>(*this, paddedNumAtoms, "atomIndex"); atomIndexDevice = CudaArray::create<int>(*this, paddedNumAtoms, "atomIndex");
atomIndex.resize(paddedNumAtoms); atomIndex.resize(paddedNumAtoms);
for (int i = 0; i < paddedNumAtoms; ++i) for (int i = 0; i < paddedNumAtoms; ++i)
...@@ -1311,6 +1321,15 @@ void CudaContext::addPostComputation(ForcePostComputation* computation) { ...@@ -1311,6 +1321,15 @@ void CudaContext::addPostComputation(ForcePostComputation* computation) {
postComputations.push_back(computation); postComputations.push_back(computation);
} }
void CudaContext::addEnergyParameterDerivative(const string& param) {
// See if this parameter has already been registered.
for (int i = 0; i < energyParamDerivNames.size(); i++)
if (param == energyParamDerivNames[i])
return;
energyParamDerivNames.push_back(param);
}
struct CudaContext::WorkThread::ThreadData { struct CudaContext::WorkThread::ThreadData {
ThreadData(std::queue<CudaContext::WorkTask*>& tasks, bool& waiting, bool& finished, ThreadData(std::queue<CudaContext::WorkTask*>& tasks, bool& waiting, bool& finished,
pthread_mutex_t& queueLock, pthread_cond_t& waitForTaskCondition, pthread_cond_t& queueEmptyCondition) : pthread_mutex_t& queueLock, pthread_cond_t& waitForTaskCondition, pthread_cond_t& queueEmptyCondition) :
......
This diff is collapsed.
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. * * Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -146,6 +146,19 @@ void CudaNonbondedUtilities::addArgument(const ParameterInfo& parameter) { ...@@ -146,6 +146,19 @@ void CudaNonbondedUtilities::addArgument(const ParameterInfo& parameter) {
arguments.push_back(parameter); arguments.push_back(parameter);
} }
string CudaNonbondedUtilities::addEnergyParameterDerivative(const string& param) {
// See if the parameter has already been added.
int index;
for (index = 0; index < energyParameterDerivatives.size(); index++)
if (param == energyParameterDerivatives[index])
break;
if (index == energyParameterDerivatives.size())
energyParameterDerivatives.push_back(param);
context.addEnergyParameterDerivative(param);
return string("energyParamDeriv")+context.intToString(index);
}
void CudaNonbondedUtilities::requestExclusions(const vector<vector<int> >& exclusionList) { void CudaNonbondedUtilities::requestExclusions(const vector<vector<int> >& exclusionList) {
if (anyExclusions) { if (anyExclusions) {
bool sameExclusions = (exclusionList.size() == atomExclusions.size()); bool sameExclusions = (exclusionList.size() == atomExclusions.size());
...@@ -308,6 +321,8 @@ void CudaNonbondedUtilities::initialize(const System& system) { ...@@ -308,6 +321,8 @@ void CudaNonbondedUtilities::initialize(const System& system) {
forceArgs.push_back(&parameters[i].getMemory()); forceArgs.push_back(&parameters[i].getMemory());
for (int i = 0; i < (int) arguments.size(); i++) for (int i = 0; i < (int) arguments.size(); i++)
forceArgs.push_back(&arguments[i].getMemory()); forceArgs.push_back(&arguments[i].getMemory());
if (energyParameterDerivatives.size() > 0)
forceArgs.push_back(&context.getEnergyParamDerivBuffer().getDevicePointer());
if (useCutoff) { if (useCutoff) {
findBlockBoundsArgs.push_back(&numAtoms); findBlockBoundsArgs.push_back(&numAtoms);
findBlockBoundsArgs.push_back(context.getPeriodicBoxSizePointer()); findBlockBoundsArgs.push_back(context.getPeriodicBoxSizePointer());
...@@ -515,6 +530,8 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, ...@@ -515,6 +530,8 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
args << "* __restrict__ "; args << "* __restrict__ ";
args << arguments[i].getName(); args << arguments[i].getName();
} }
if (energyParameterDerivatives.size() > 0)
args << ", mixed* __restrict__ energyParamDerivs";
replacements["PARAMETER_ARGUMENTS"] = args.str(); replacements["PARAMETER_ARGUMENTS"] = args.str();
stringstream load1; stringstream load1;
...@@ -623,6 +640,18 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, ...@@ -623,6 +640,18 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
} }
} }
replacements["LOAD_ATOM2_PARAMETERS"] = load2j.str(); replacements["LOAD_ATOM2_PARAMETERS"] = load2j.str();
stringstream initDerivs;
for (int i = 0; i < energyParameterDerivatives.size(); i++)
initDerivs<<"mixed energyParamDeriv"<<i<<" = 0;\n";
replacements["INIT_DERIVATIVES"] = initDerivs.str();
stringstream saveDerivs;
const vector<string>& allParamDerivNames = context.getEnergyParamDerivNames();
int numDerivs = allParamDerivNames.size();
for (int i = 0; i < energyParameterDerivatives.size(); i++)
for (int index = 0; index < numDerivs; index++)
if (allParamDerivNames[index] == energyParameterDerivatives[i])
saveDerivs<<"energyParamDerivs[(blockIdx.x*blockDim.x+threadIdx.x)*"<<numDerivs<<"+"<<index<<"] += energyParamDeriv"<<i<<";\n";
replacements["SAVE_DERIVATIVES"] = saveDerivs.str();
stringstream shuffleWarpData; stringstream shuffleWarpData;
if(useShuffle) { if(useShuffle) {
......
...@@ -111,10 +111,12 @@ extern "C" __global__ void computeGroupForces(unsigned long long* __restrict__ g ...@@ -111,10 +111,12 @@ extern "C" __global__ void computeGroupForces(unsigned long long* __restrict__ g
const int* __restrict__ bondGroups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ const int* __restrict__ bondGroups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
EXTRA_ARGS) { EXTRA_ARGS) {
mixed energy = 0; mixed energy = 0;
INIT_PARAM_DERIVS
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_BONDS; index += blockDim.x*gridDim.x) { for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_BONDS; index += blockDim.x*gridDim.x) {
COMPUTE_FORCE COMPUTE_FORCE
} }
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
SAVE_PARAM_DERIVS
} }
/** /**
......
...@@ -28,6 +28,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -28,6 +28,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx; const unsigned int tbx = threadIdx.x - tgx;
mixed energy = 0; mixed energy = 0;
INIT_PARAM_DERIVS
__shared__ AtomData localData[THREAD_BLOCK_SIZE]; __shared__ AtomData localData[THREAD_BLOCK_SIZE];
// First loop: process tiles that contain exclusions. // First loop: process tiles that contain exclusions.
...@@ -69,6 +70,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -69,6 +70,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
real dEdR = 0; real dEdR = 0;
real tempEnergy = 0; real tempEnergy = 0;
const real interactionScale = 0.5f;
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1); bool isExcluded = !(excl & 0x1);
#endif #endif
...@@ -120,6 +122,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -120,6 +122,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = y*TILE_SIZE+tj; atom2 = y*TILE_SIZE+tj;
real dEdR = 0; real dEdR = 0;
real tempEnergy = 0; real tempEnergy = 0;
const real interactionScale = 1;
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1); bool isExcluded = !(excl & 0x1);
#endif #endif
...@@ -266,6 +269,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -266,6 +269,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
real dEdR = 0; real dEdR = 0;
real tempEnergy = 0; real tempEnergy = 0;
const real interactionScale = 1;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_INTERACTION COMPUTE_INTERACTION
dEdR /= -r; dEdR /= -r;
...@@ -309,6 +313,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -309,6 +313,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
real dEdR = 0; real dEdR = 0;
real tempEnergy = 0; real tempEnergy = 0;
const real interactionScale = 1;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_INTERACTION COMPUTE_INTERACTION
dEdR /= -r; dEdR /= -r;
...@@ -353,4 +358,5 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -353,4 +358,5 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
pos++; pos++;
} }
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
SAVE_PARAM_DERIVS
} }
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
mixed energy = 0; mixed energy = 0;
INIT_PARAM_DERIVS
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) { for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
// Load the derivatives // Load the derivatives
...@@ -17,4 +18,5 @@ extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forc ...@@ -17,4 +18,5 @@ extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forc
COMPUTE_ENERGY COMPUTE_ENERGY
} }
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
SAVE_PARAM_DERIVS
} }
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__ forceBuffers, const real4* __restrict__ posq extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__ forceBuffers, const real4* __restrict__ posq
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
INIT_PARAM_DERIVS
const real scale = RECIP((real) 0x100000000); const real scale = RECIP((real) 0x100000000);
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) { for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
real4 pos = posq[index]; real4 pos = posq[index];
...@@ -13,4 +14,5 @@ extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__ ...@@ -13,4 +14,5 @@ extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__
forceBuffers[index+PADDED_NUM_ATOMS] = (long long) (force.y*0x100000000); forceBuffers[index+PADDED_NUM_ATOMS] = (long long) (force.y*0x100000000);
forceBuffers[index+PADDED_NUM_ATOMS*2] = (long long) (force.z*0x100000000); forceBuffers[index+PADDED_NUM_ATOMS*2] = (long long) (force.z*0x100000000);
} }
SAVE_PARAM_DERIVS
} }
...@@ -73,6 +73,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -73,6 +73,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
COMPUTE_VALUE COMPUTE_VALUE
} }
value += tempValue1; value += tempValue1;
ADD_TEMP_DERIVS1
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
} }
#endif #endif
...@@ -121,6 +122,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -121,6 +122,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
} }
value += tempValue1; value += tempValue1;
localData[tbx+tj].value += tempValue2; localData[tbx+tj].value += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
} }
#endif #endif
...@@ -133,11 +136,13 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -133,11 +136,13 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Write results. // Write results.
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset1 = x*TILE_SIZE + tgx;
atomicAdd(&global_value[offset], static_cast<unsigned long long>((long long) (value*0x100000000))); atomicAdd(&global_value[offset1], static_cast<unsigned long long>((long long) (value*0x100000000)));
STORE_PARAM_DERIVS1
if (x != y) { if (x != y) {
offset = y*TILE_SIZE + tgx; unsigned int offset2 = y*TILE_SIZE + tgx;
atomicAdd(&global_value[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].value*0x100000000))); atomicAdd(&global_value[offset2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].value*0x100000000)));
STORE_PARAM_DERIVS2
} }
} }
...@@ -244,6 +249,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -244,6 +249,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
} }
value += tempValue1; value += tempValue1;
localData[tbx+tj].value += tempValue2; localData[tbx+tj].value += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
} }
tj = (tj + 1) & (TILE_SIZE - 1); tj = (tj + 1) & (TILE_SIZE - 1);
} }
...@@ -276,6 +283,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -276,6 +283,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
} }
value += tempValue1; value += tempValue1;
localData[tbx+tj].value += tempValue2; localData[tbx+tj].value += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
} }
#endif #endif
...@@ -285,14 +294,19 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -285,14 +294,19 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Write results. // Write results.
atomicAdd(&global_value[atom1], static_cast<unsigned long long>((long long) (value*0x100000000))); unsigned int offset1 = atom1;
atomicAdd(&global_value[offset1], static_cast<unsigned long long>((long long) (value*0x100000000)));
STORE_PARAM_DERIVS1
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int atom2 = atomIndices[threadIdx.x]; unsigned int atom2 = atomIndices[threadIdx.x];
#else #else
unsigned int atom2 = y*TILE_SIZE + tgx; unsigned int atom2 = y*TILE_SIZE + tgx;
#endif #endif
if (atom2 < PADDED_NUM_ATOMS) if (atom2 < PADDED_NUM_ATOMS) {
atomicAdd(&global_value[atom2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].value*0x100000000))); unsigned int offset2 = atom2;
atomicAdd(&global_value[offset2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].value*0x100000000)));
STORE_PARAM_DERIVS2
}
} }
pos++; pos++;
} }
......
...@@ -8,6 +8,7 @@ extern "C" __global__ void computePerParticleValues(real4* posq, long long* valu ...@@ -8,6 +8,7 @@ extern "C" __global__ void computePerParticleValues(real4* posq, long long* valu
// Load the pairwise value // Load the pairwise value
real sum = valueBuffers[index]/(real) 0x100000000; real sum = valueBuffers[index]/(real) 0x100000000;
REDUCE_PARAM0_DERIV
// Now calculate other values // Now calculate other values
......
...@@ -33,7 +33,8 @@ inline __device__ mixed4 convertFromDouble4(double4 a) { ...@@ -33,7 +33,8 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ posDelta, extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ posDelta,
mixed4* __restrict__ velm, const long long* __restrict__ force, const mixed2* __restrict__ dt, const mixed* __restrict__ globals, mixed4* __restrict__ velm, const long long* __restrict__ force, const mixed2* __restrict__ dt, const mixed* __restrict__ globals,
mixed* __restrict__ sum, const float4* __restrict__ gaussianValues, unsigned int gaussianBaseIndex, const float4* __restrict__ uniformValues, const real energy mixed* __restrict__ sum, const float4* __restrict__ gaussianValues, unsigned int gaussianBaseIndex, const float4* __restrict__ uniformValues,
const real energy, mixed* __restrict__ energyParamDerivs
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
mixed stepSize = dt[0].y; mixed stepSize = dt[0].y;
int index = blockIdx.x*blockDim.x+threadIdx.x; int index = blockIdx.x*blockDim.x+threadIdx.x;
......
...@@ -4,15 +4,18 @@ if (!isExcluded && r2 < CUTOFF_SQUARED) { ...@@ -4,15 +4,18 @@ if (!isExcluded && r2 < CUTOFF_SQUARED) {
if (!isExcluded) { if (!isExcluded) {
#endif #endif
real tempForce = 0; real tempForce = 0;
COMPUTE_FORCE real switchValue = 1, switchDeriv = 0;
#if USE_SWITCH #if USE_SWITCH
if (r > SWITCH_CUTOFF) { if (r > SWITCH_CUTOFF) {
real x = r-SWITCH_CUTOFF; real x = r-SWITCH_CUTOFF;
real switchValue = 1+x*x*x*(SWITCH_C3+x*(SWITCH_C4+x*SWITCH_C5)); switchValue = 1+x*x*x*(SWITCH_C3+x*(SWITCH_C4+x*SWITCH_C5));
real switchDeriv = x*x*(3*SWITCH_C3+x*(4*SWITCH_C4+x*5*SWITCH_C5)); switchDeriv = x*x*(3*SWITCH_C3+x*(4*SWITCH_C4+x*5*SWITCH_C5));
tempForce = tempForce*switchValue - tempEnergy*switchDeriv;
tempEnergy *= switchValue;
} }
#endif
COMPUTE_FORCE
#if USE_SWITCH
tempForce = tempForce*switchValue - tempEnergy*switchDeriv;
tempEnergy *= switchValue;
#endif #endif
dEdR += tempForce*invR; dEdR += tempForce*invR;
} }
...@@ -113,6 +113,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -113,6 +113,7 @@ extern "C" __global__ void computeNonbonded(
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); // index within the warp const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); // index within the warp
const unsigned int tbx = threadIdx.x - tgx; // block warpIndex const unsigned int tbx = threadIdx.x - tgx; // block warpIndex
mixed energy = 0; mixed energy = 0;
INIT_DERIVATIVES
// used shared memory if the device cannot shuffle // used shared memory if the device cannot shuffle
#ifndef ENABLE_SHUFFLE #ifndef ENABLE_SHUFFLE
__shared__ AtomData localData[THREAD_BLOCK_SIZE]; __shared__ AtomData localData[THREAD_BLOCK_SIZE];
...@@ -175,6 +176,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -175,6 +176,7 @@ extern "C" __global__ void computeNonbonded(
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1)); bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
#endif #endif
real tempEnergy = 0.0f; real tempEnergy = 0.0f;
const real interactionScale = 0.5f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
energy += 0.5f*tempEnergy; energy += 0.5f*tempEnergy;
#ifdef INCLUDE_FORCES #ifdef INCLUDE_FORCES
...@@ -243,6 +245,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -243,6 +245,7 @@ extern "C" __global__ void computeNonbonded(
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1)); bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
#endif #endif
real tempEnergy = 0.0f; real tempEnergy = 0.0f;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
energy += tempEnergy; energy += tempEnergy;
#ifdef INCLUDE_FORCES #ifdef INCLUDE_FORCES
...@@ -448,6 +451,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -448,6 +451,7 @@ extern "C" __global__ void computeNonbonded(
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS); bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS);
#endif #endif
real tempEnergy = 0.0f; real tempEnergy = 0.0f;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
energy += tempEnergy; energy += tempEnergy;
#ifdef INCLUDE_FORCES #ifdef INCLUDE_FORCES
...@@ -518,6 +522,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -518,6 +522,7 @@ extern "C" __global__ void computeNonbonded(
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS); bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS);
#endif #endif
real tempEnergy = 0.0f; real tempEnergy = 0.0f;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
energy += tempEnergy; energy += tempEnergy;
#ifdef INCLUDE_FORCES #ifdef INCLUDE_FORCES
...@@ -586,4 +591,5 @@ extern "C" __global__ void computeNonbonded( ...@@ -586,4 +591,5 @@ extern "C" __global__ void computeNonbonded(
#ifdef INCLUDE_ENERGY #ifdef INCLUDE_ENERGY
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
#endif #endif
SAVE_DERIVATIVES
} }
\ No newline at end of file
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2015 Stanford University and the Authors. * * Portions copyright (c) 2011-2016 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -100,6 +100,15 @@ public: ...@@ -100,6 +100,15 @@ public:
* refer to it by this name. * refer to it by this name.
*/ */
std::string addArgument(cl::Memory& data, const std::string& type); std::string addArgument(cl::Memory& data, const std::string& type);
/**
* Register that the interaction kernel will be computing the derivative of the potential energy
* with respect to a parameter.
*
* @param param the name of the parameter
* @return the variable that will be used to accumulate the derivative. Any code you pass to addInteraction() should
* add its contributions to this variable.
*/
std::string addEnergyParameterDerivative(const std::string& param);
/** /**
* Add some OpenCL code that should be included in the program, before the start of the kernel. * Add some OpenCL code that should be included in the program, before the start of the kernel.
* This can be used, for example, to define functions that will be called by the kernel. * This can be used, for example, to define functions that will be called by the kernel.
...@@ -137,6 +146,7 @@ private: ...@@ -137,6 +146,7 @@ private:
std::vector<OpenCLArray*> atomIndices; std::vector<OpenCLArray*> atomIndices;
std::vector<OpenCLArray*> bufferIndices; std::vector<OpenCLArray*> bufferIndices;
std::vector<std::string> prefixCode; std::vector<std::string> prefixCode;
std::vector<std::string> energyParameterDerivatives;
int numForceBuffers, maxBonds, allGroups; int numForceBuffers, maxBonds, allGroups;
bool hasInitializedKernels; bool hasInitializedKernels;
}; };
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. * * Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -264,6 +264,12 @@ public: ...@@ -264,6 +264,12 @@ public:
OpenCLArray& getEnergyBuffer() { OpenCLArray& getEnergyBuffer() {
return *energyBuffer; return *energyBuffer;
} }
/**
* Get the array which contains the buffer in which derivatives of the energy with respect to parameters are computed.
*/
OpenCLArray& getEnergyParamDerivBuffer() {
return *energyParamDerivBuffer;
}
/** /**
* Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device. * Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
* This is guaranteed to be at least as large as any of the arrays returned by methods of this class. * This is guaranteed to be at least as large as any of the arrays returned by methods of this class.
...@@ -659,6 +665,27 @@ public: ...@@ -659,6 +665,27 @@ public:
std::vector<ForcePostComputation*>& getPostComputations() { std::vector<ForcePostComputation*>& getPostComputations() {
return postComputations; return postComputations;
} }
/**
* Get the names of all parameters with respect to which energy derivatives are computed.
*/
const std::vector<std::string>& getEnergyParamDerivNames() const {
return energyParamDerivNames;
}
/**
* Get a workspace data structure used for accumulating the values of derivatives of the energy
* with respect to parameters.
*/
std::map<std::string, double>& getEnergyParamDerivWorkspace() {
return energyParamDerivWorkspace;
}
/**
* Register that the derivative of potential energy with respect to a context parameter
* will need to be calculated. If this is called multiple times for a single parameter,
* it is only added to the list once.
*
* @param param the name of the parameter to add
*/
void addEnergyParameterDerivative(const std::string& param);
/** /**
* Mark that the current molecule definitions (and hence the atom order) may be invalid. * Mark that the current molecule definitions (and hence the atom order) may be invalid.
* This should be called whenever force field parameters change. It will cause the definitions * This should be called whenever force field parameters change. It will cause the definitions
...@@ -725,7 +752,10 @@ private: ...@@ -725,7 +752,10 @@ private:
OpenCLArray* forceBuffers; OpenCLArray* forceBuffers;
OpenCLArray* longForceBuffer; OpenCLArray* longForceBuffer;
OpenCLArray* energyBuffer; OpenCLArray* energyBuffer;
OpenCLArray* energyParamDerivBuffer;
OpenCLArray* atomIndexDevice; OpenCLArray* atomIndexDevice;
std::vector<std::string> energyParamDerivNames;
std::map<std::string, double> energyParamDerivWorkspace;
std::vector<int> atomIndex; std::vector<int> atomIndex;
std::vector<cl::Memory*> autoclearBuffers; std::vector<cl::Memory*> autoclearBuffers;
std::vector<int> autoclearBufferSizes; std::vector<int> autoclearBufferSizes;
......
...@@ -141,6 +141,12 @@ public: ...@@ -141,6 +141,12 @@ public:
* @param forces on exit, this contains the forces * @param forces on exit, this contains the forces
*/ */
void getForces(ContextImpl& context, std::vector<Vec3>& forces); void getForces(ContextImpl& context, std::vector<Vec3>& forces);
/**
* Get the current derivatives of the energy with respect to context parameters.
*
* @param derivs on exit, this contains the derivatives
*/
void getEnergyParameterDerivatives(ContextImpl& context, std::map<std::string, double>& derivs);
/** /**
* Get the current periodic box vectors. * Get the current periodic box vectors.
* *
...@@ -709,6 +715,7 @@ private: ...@@ -709,6 +715,7 @@ private:
std::vector<cl_float> globalParamValues; std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray*> tabulatedFunctions; std::vector<OpenCLArray*> tabulatedFunctions;
double longRangeCoefficient; double longRangeCoefficient;
std::vector<double> longRangeCoefficientDerivs;
bool hasInitializedLongRangeCorrection, hasInitializedKernel; bool hasInitializedLongRangeCorrection, hasInitializedKernel;
int numGroupThreadBlocks; int numGroupThreadBlocks;
CustomNonbondedForce* forceCopy; CustomNonbondedForce* forceCopy;
...@@ -801,13 +808,15 @@ public: ...@@ -801,13 +808,15 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomGBForce& force); void copyParametersToContext(ContextImpl& context, const CustomGBForce& force);
private: private:
double cutoff; double cutoff;
bool hasInitializedKernels, needParameterGradient; bool hasInitializedKernels, needParameterGradient, needEnergyParamDerivs;
int maxTiles, numComputedValues; int maxTiles, numComputedValues;
OpenCLContext& cl; OpenCLContext& cl;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLParameterSet* computedValues; OpenCLParameterSet* computedValues;
OpenCLParameterSet* energyDerivs; OpenCLParameterSet* energyDerivs;
OpenCLParameterSet* energyDerivChain; OpenCLParameterSet* energyDerivChain;
std::vector<OpenCLParameterSet*> dValuedParam;
std::vector<OpenCLArray*> dValue0dParam;
OpenCLArray* longEnergyDerivs; OpenCLArray* longEnergyDerivs;
OpenCLArray* globals; OpenCLArray* globals;
OpenCLArray* valueBuffers; OpenCLArray* valueBuffers;
...@@ -953,6 +962,7 @@ public: ...@@ -953,6 +962,7 @@ public:
private: private:
int numGroups, numBonds; int numGroups, numBonds;
bool needEnergyParamDerivs;
OpenCLContext& cl; OpenCLContext& cl;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray* globals; OpenCLArray* globals;
...@@ -1273,7 +1283,7 @@ public: ...@@ -1273,7 +1283,7 @@ public:
enum GlobalTargetType {DT, VARIABLE, PARAMETER}; enum GlobalTargetType {DT, VARIABLE, PARAMETER};
OpenCLIntegrateCustomStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateCustomStepKernel(name, platform), cl(cl), OpenCLIntegrateCustomStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateCustomStepKernel(name, platform), cl(cl),
hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), sumBuffer(NULL), summedValue(NULL), uniformRandoms(NULL), hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), sumBuffer(NULL), summedValue(NULL), uniformRandoms(NULL),
randomSeed(NULL), perDofValues(NULL) { randomSeed(NULL), perDofEnergyParamDerivs(NULL), perDofValues(NULL), needsEnergyParamDerivs(false) {
} }
~OpenCLIntegrateCustomStepKernel(); ~OpenCLIntegrateCustomStepKernel();
/** /**
...@@ -1338,8 +1348,11 @@ public: ...@@ -1338,8 +1348,11 @@ public:
private: private:
class ReorderListener; class ReorderListener;
class GlobalTarget; class GlobalTarget;
class DerivFunction;
std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName); std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName);
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid); void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
Lepton::ExpressionTreeNode replaceDerivFunctions(const Lepton::ExpressionTreeNode& node, OpenMM::ContextImpl& context);
void findExpressionsForDerivs(const Lepton::ExpressionTreeNode& node, std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variableNodes);
void recordGlobalValue(double value, GlobalTarget target); void recordGlobalValue(double value, GlobalTarget target);
void recordChangedParameters(ContextImpl& context); void recordChangedParameters(ContextImpl& context);
bool evaluateCondition(int step); bool evaluateCondition(int step);
...@@ -1347,18 +1360,23 @@ private: ...@@ -1347,18 +1360,23 @@ private:
double energy; double energy;
float energyFloat; float energyFloat;
int numGlobalVariables; int numGlobalVariables;
bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints; bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs;
mutable bool localValuesAreCurrent; mutable bool localValuesAreCurrent;
OpenCLArray* globalValues; OpenCLArray* globalValues;
OpenCLArray* sumBuffer; OpenCLArray* sumBuffer;
OpenCLArray* summedValue; OpenCLArray* summedValue;
OpenCLArray* uniformRandoms; OpenCLArray* uniformRandoms;
OpenCLArray* randomSeed; OpenCLArray* randomSeed;
OpenCLArray* perDofEnergyParamDerivs;
std::map<int, OpenCLArray*> savedForces; std::map<int, OpenCLArray*> savedForces;
std::set<int> validSavedForces; std::set<int> validSavedForces;
OpenCLParameterSet* perDofValues; OpenCLParameterSet* perDofValues;
mutable std::vector<std::vector<cl_float> > localPerDofValuesFloat; mutable std::vector<std::vector<cl_float> > localPerDofValuesFloat;
mutable std::vector<std::vector<cl_double> > localPerDofValuesDouble; mutable std::vector<std::vector<cl_double> > localPerDofValuesDouble;
std::map<std::string, double> energyParamDerivs;
std::vector<std::string> perDofEnergyParamDerivNames;
std::vector<cl_float> localPerDofEnergyParamDerivsFloat;
std::vector<cl_double> localPerDofEnergyParamDerivsDouble;
std::vector<float> globalValuesFloat; std::vector<float> globalValuesFloat;
std::vector<double> globalValuesDouble; std::vector<double> globalValuesDouble;
std::vector<double> initialGlobalVariables; std::vector<double> initialGlobalVariables;
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2013 Stanford University and the Authors. * * Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -88,6 +88,15 @@ public: ...@@ -88,6 +88,15 @@ public:
* Add an array (other than a per-atom parameter) that should be passed as an argument to the default interaction kernel. * Add an array (other than a per-atom parameter) that should be passed as an argument to the default interaction kernel.
*/ */
void addArgument(const ParameterInfo& parameter); void addArgument(const ParameterInfo& parameter);
/**
* Register that the interaction kernel will be computing the derivative of the potential energy
* with respect to a parameter.
*
* @param param the name of the parameter
* @return the variable that will be used to accumulate the derivative. Any code you pass to addInteraction() should
* add its contributions to this variable.
*/
std::string addEnergyParameterDerivative(const std::string& param);
/** /**
* Specify the list of exclusions that an interaction outside the default kernel will depend on. * Specify the list of exclusions that an interaction outside the default kernel will depend on.
* *
...@@ -287,6 +296,7 @@ private: ...@@ -287,6 +296,7 @@ private:
std::vector<std::vector<int> > atomExclusions; std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters; std::vector<ParameterInfo> parameters;
std::vector<ParameterInfo> arguments; std::vector<ParameterInfo> arguments;
std::vector<std::string> energyParameterDerivatives;
std::map<int, double> groupCutoff; std::map<int, double> groupCutoff;
std::map<int, std::string> groupKernelSource; std::map<int, std::string> groupKernelSource;
double lastCutoff; double lastCutoff;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2015 Stanford University and the Authors. * * Portions copyright (c) 2011-2016 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -56,12 +56,25 @@ void OpenCLBondedUtilities::addInteraction(const vector<vector<int> >& atoms, co ...@@ -56,12 +56,25 @@ void OpenCLBondedUtilities::addInteraction(const vector<vector<int> >& atoms, co
} }
} }
std::string OpenCLBondedUtilities::addArgument(cl::Memory& data, const string& type) { string OpenCLBondedUtilities::addArgument(cl::Memory& data, const string& type) {
arguments.push_back(&data); arguments.push_back(&data);
argTypes.push_back(type); argTypes.push_back(type);
return "customArg"+context.intToString(arguments.size()); return "customArg"+context.intToString(arguments.size());
} }
string OpenCLBondedUtilities::addEnergyParameterDerivative(const string& param) {
// See if the parameter has already been added.
int index;
for (index = 0; index < energyParameterDerivatives.size(); index++)
if (param == energyParameterDerivatives[index])
break;
if (index == energyParameterDerivatives.size())
energyParameterDerivatives.push_back(param);
context.addEnergyParameterDerivative(param);
return string("energyParamDeriv")+context.intToString(index);
}
void OpenCLBondedUtilities::addPrefixCode(const string& source) { void OpenCLBondedUtilities::addPrefixCode(const string& source) {
for (int i = 0; i < (int) prefixCode.size(); i++) for (int i = 0; i < (int) prefixCode.size(); i++)
if (prefixCode[i] == source) if (prefixCode[i] == source)
...@@ -190,13 +203,23 @@ void OpenCLBondedUtilities::initialize(const System& system) { ...@@ -190,13 +203,23 @@ void OpenCLBondedUtilities::initialize(const System& system) {
} }
for (int i = 0; i < (int) arguments.size(); i++) for (int i = 0; i < (int) arguments.size(); i++)
s<<", __global "<<argTypes[i]<<"* customArg"<<(i+1); s<<", __global "<<argTypes[i]<<"* customArg"<<(i+1);
if (energyParameterDerivatives.size() > 0)
s<<", __global mixed* restrict energyParamDerivs";
s<<") {\n"; s<<") {\n";
s<<"mixed energy = 0;\n"; s<<"mixed energy = 0;\n";
for (int i = 0; i < energyParameterDerivatives.size(); i++)
s<<"mixed energyParamDeriv"<<i<<" = 0;\n";
for (int i = 0; i < setSize; i++) { for (int i = 0; i < setSize; i++) {
int force = set[i]; int force = set[i];
s<<createForceSource(i, forceAtoms[force].size(), forceAtoms[force][0].size(), forceGroup[force], forceSource[force]); s<<createForceSource(i, forceAtoms[force].size(), forceAtoms[force][0].size(), forceGroup[force], forceSource[force]);
} }
s<<"energyBuffer[get_global_id(0)] += energy;\n"; s<<"energyBuffer[get_global_id(0)] += energy;\n";
const vector<string>& allParamDerivNames = context.getEnergyParamDerivNames();
int numDerivs = allParamDerivNames.size();
for (int i = 0; i < energyParameterDerivatives.size(); i++)
for (int index = 0; index < numDerivs; index++)
if (allParamDerivNames[index] == energyParameterDerivatives[i])
s<<"energyParamDerivs[get_global_id(0)*"<<numDerivs<<"+"<<index<<"] += energyParamDeriv"<<i<<";\n";
s<<"}\n"; s<<"}\n";
map<string, string> defines; map<string, string> defines;
defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms());
...@@ -274,6 +297,8 @@ void OpenCLBondedUtilities::computeInteractions(int groups) { ...@@ -274,6 +297,8 @@ void OpenCLBondedUtilities::computeInteractions(int groups) {
} }
for (int j = 0; j < (int) arguments.size(); j++) for (int j = 0; j < (int) arguments.size(); j++)
kernel.setArg<cl::Memory>(index++, *arguments[j]); kernel.setArg<cl::Memory>(index++, *arguments[j]);
if (energyParameterDerivatives.size() > 0)
kernel.setArg<cl::Memory>(index++, context.getEnergyParamDerivBuffer().getDeviceBuffer());
} }
} }
for (int i = 0; i < (int) kernels.size(); i++) { for (int i = 0; i < (int) kernels.size(); i++) {
......
...@@ -69,7 +69,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i ...@@ -69,7 +69,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData) : OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData) :
system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false), posq(NULL), system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false), posq(NULL),
posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndexDevice(NULL), integration(NULL), posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), integration(NULL),
expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) { expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
if (precision == "single") { if (precision == "single") {
useDoublePrecision = false; useDoublePrecision = false;
...@@ -435,6 +435,8 @@ OpenCLContext::~OpenCLContext() { ...@@ -435,6 +435,8 @@ OpenCLContext::~OpenCLContext() {
delete longForceBuffer; delete longForceBuffer;
if (energyBuffer != NULL) if (energyBuffer != NULL)
delete energyBuffer; delete energyBuffer;
if (energyParamDerivBuffer != NULL)
delete energyParamDerivBuffer;
if (atomIndexDevice != NULL) if (atomIndexDevice != NULL)
delete atomIndexDevice; delete atomIndexDevice;
if (integration != NULL) if (integration != NULL)
...@@ -455,15 +457,16 @@ void OpenCLContext::initialize() { ...@@ -455,15 +457,16 @@ void OpenCLContext::initialize() {
numForceBuffers = std::max(numForceBuffers, bonded->getNumForceBuffers()); numForceBuffers = std::max(numForceBuffers, bonded->getNumForceBuffers());
for (int i = 0; i < (int) forces.size(); i++) for (int i = 0; i < (int) forces.size(); i++)
numForceBuffers = std::max(numForceBuffers, forces[i]->getRequiredForceBuffers()); numForceBuffers = std::max(numForceBuffers, forces[i]->getRequiredForceBuffers());
int energyBufferSize = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers());
if (useDoublePrecision) { if (useDoublePrecision) {
forceBuffers = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers"); forceBuffers = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
force = OpenCLArray::create<mm_double4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force"); force = OpenCLArray::create<mm_double4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force");
energyBuffer = OpenCLArray::create<cl_double>(*this, max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers()), "energyBuffer"); energyBuffer = OpenCLArray::create<cl_double>(*this, energyBufferSize, "energyBuffer");
} }
else { else {
forceBuffers = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers"); forceBuffers = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
force = OpenCLArray::create<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force"); force = OpenCLArray::create<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force");
energyBuffer = OpenCLArray::create<cl_double>(*this, max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers()), "energyBuffer"); energyBuffer = OpenCLArray::create<cl_double>(*this, energyBufferSize, "energyBuffer");
} }
if (supports64BitGlobalAtomics) { if (supports64BitGlobalAtomics) {
longForceBuffer = OpenCLArray::create<cl_long>(*this, 3*paddedNumAtoms, "longForceBuffer"); longForceBuffer = OpenCLArray::create<cl_long>(*this, 3*paddedNumAtoms, "longForceBuffer");
...@@ -475,7 +478,15 @@ void OpenCLContext::initialize() { ...@@ -475,7 +478,15 @@ void OpenCLContext::initialize() {
} }
addAutoclearBuffer(*forceBuffers); addAutoclearBuffer(*forceBuffers);
addAutoclearBuffer(*energyBuffer); addAutoclearBuffer(*energyBuffer);
int bufferBytes = max(velm->getSize()*velm->getElementSize(), energyBuffer->getSize()*energyBuffer->getElementSize()); int numEnergyParamDerivs = energyParamDerivNames.size();
if (numEnergyParamDerivs > 0) {
if (useDoublePrecision || useMixedPrecision)
energyParamDerivBuffer = OpenCLArray::create<cl_double>(*this, numEnergyParamDerivs*energyBufferSize, "energyParamDerivBuffer");
else
energyParamDerivBuffer = OpenCLArray::create<cl_float>(*this, numEnergyParamDerivs*energyBufferSize, "energyParamDerivBuffer");
addAutoclearBuffer(*energyParamDerivBuffer);
}
int bufferBytes = max(velm->getSize()*velm->getElementSize(), energyBufferSize*energyBuffer->getElementSize());
pinnedBuffer = new cl::Buffer(context, CL_MEM_ALLOC_HOST_PTR, bufferBytes); pinnedBuffer = new cl::Buffer(context, CL_MEM_ALLOC_HOST_PTR, bufferBytes);
pinnedMemory = currentQueue.enqueueMapBuffer(*pinnedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes); pinnedMemory = currentQueue.enqueueMapBuffer(*pinnedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
...@@ -1229,6 +1240,15 @@ void OpenCLContext::addPostComputation(ForcePostComputation* computation) { ...@@ -1229,6 +1240,15 @@ void OpenCLContext::addPostComputation(ForcePostComputation* computation) {
postComputations.push_back(computation); postComputations.push_back(computation);
} }
void OpenCLContext::addEnergyParameterDerivative(const string& param) {
// See if this parameter has already been registered.
for (int i = 0; i < energyParamDerivNames.size(); i++)
if (param == energyParamDerivNames[i])
return;
energyParamDerivNames.push_back(param);
}
struct OpenCLContext::WorkThread::ThreadData { struct OpenCLContext::WorkThread::ThreadData {
ThreadData(std::queue<OpenCLContext::WorkTask*>& tasks, bool& waiting, bool& finished, ThreadData(std::queue<OpenCLContext::WorkTask*>& tasks, bool& waiting, bool& finished,
pthread_mutex_t& queueLock, pthread_cond_t& waitForTaskCondition, pthread_cond_t& queueEmptyCondition) : pthread_mutex_t& queueLock, pthread_cond_t& waitForTaskCondition, pthread_cond_t& queueEmptyCondition) :
......
This diff is collapsed.
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. * * Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -162,6 +162,19 @@ void OpenCLNonbondedUtilities::addArgument(const ParameterInfo& parameter) { ...@@ -162,6 +162,19 @@ void OpenCLNonbondedUtilities::addArgument(const ParameterInfo& parameter) {
arguments.push_back(parameter); arguments.push_back(parameter);
} }
string OpenCLNonbondedUtilities::addEnergyParameterDerivative(const string& param) {
// See if the parameter has already been added.
int index;
for (index = 0; index < energyParameterDerivatives.size(); index++)
if (param == energyParameterDerivatives[index])
break;
if (index == energyParameterDerivatives.size())
energyParameterDerivatives.push_back(param);
context.addEnergyParameterDerivative(param);
return string("energyParamDeriv")+context.intToString(index);
}
void OpenCLNonbondedUtilities::requestExclusions(const vector<vector<int> >& exclusionList) { void OpenCLNonbondedUtilities::requestExclusions(const vector<vector<int> >& exclusionList) {
if (anyExclusions) { if (anyExclusions) {
bool sameExclusions = (exclusionList.size() == atomExclusions.size()); bool sameExclusions = (exclusionList.size() == atomExclusions.size());
...@@ -591,6 +604,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -591,6 +604,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
args << arguments[i].getName(); args << arguments[i].getName();
} }
} }
if (energyParameterDerivatives.size() > 0)
args << ", __global mixed* restrict energyParamDerivs";
replacements["PARAMETER_ARGUMENTS"] = args.str(); replacements["PARAMETER_ARGUMENTS"] = args.str();
stringstream loadLocal1; stringstream loadLocal1;
for (int i = 0; i < (int) params.size(); i++) { for (int i = 0; i < (int) params.size(); i++) {
...@@ -641,6 +656,18 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -641,6 +656,18 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
} }
} }
replacements["LOAD_ATOM2_PARAMETERS"] = load2j.str(); replacements["LOAD_ATOM2_PARAMETERS"] = load2j.str();
stringstream initDerivs;
for (int i = 0; i < energyParameterDerivatives.size(); i++)
initDerivs<<"mixed energyParamDeriv"<<i<<" = 0;\n";
replacements["INIT_DERIVATIVES"] = initDerivs.str();
stringstream saveDerivs;
const vector<string>& allParamDerivNames = context.getEnergyParamDerivNames();
int numDerivs = allParamDerivNames.size();
for (int i = 0; i < energyParameterDerivatives.size(); i++)
for (int index = 0; index < numDerivs; index++)
if (allParamDerivNames[index] == energyParameterDerivatives[i])
saveDerivs<<"energyParamDerivs[get_global_id(0)*"<<numDerivs<<"+"<<index<<"] += energyParamDeriv"<<i<<";\n";
replacements["SAVE_DERIVATIVES"] = saveDerivs.str();
map<string, string> defines; map<string, string> defines;
if (useCutoff) if (useCutoff)
defines["USE_CUTOFF"] = "1"; defines["USE_CUTOFF"] = "1";
...@@ -716,5 +743,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -716,5 +743,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
for (int i = 0; i < (int) arguments.size(); i++) { for (int i = 0; i < (int) arguments.size(); i++) {
kernel.setArg<cl::Memory>(index++, arguments[i].getMemory()); kernel.setArg<cl::Memory>(index++, arguments[i].getMemory());
} }
if (energyParameterDerivatives.size() > 0)
kernel.setArg<cl::Memory>(index++, context.getEnergyParamDerivBuffer().getDeviceBuffer());
return kernel; return kernel;
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment