Commit a381a3ab authored by peastman's avatar peastman
Browse files

Merge branch 'master' into gayberne

parents 5ecc8e00 1f7866ad
......@@ -73,6 +73,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
COMPUTE_VALUE
}
value += tempValue1;
ADD_TEMP_DERIVS1
#ifdef USE_CUTOFF
}
#endif
......@@ -121,6 +122,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
value += tempValue1;
localData[tbx+tj].value += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF
}
#endif
......@@ -133,11 +136,13 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Write results.
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&global_value[offset], static_cast<unsigned long long>((long long) (value*0x100000000)));
unsigned int offset1 = x*TILE_SIZE + tgx;
atomicAdd(&global_value[offset1], static_cast<unsigned long long>((long long) (value*0x100000000)));
STORE_PARAM_DERIVS1
if (x != y) {
offset = y*TILE_SIZE + tgx;
atomicAdd(&global_value[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].value*0x100000000)));
unsigned int offset2 = y*TILE_SIZE + tgx;
atomicAdd(&global_value[offset2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].value*0x100000000)));
STORE_PARAM_DERIVS2
}
}
......@@ -146,6 +151,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else
......@@ -167,16 +174,12 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
else
#endif
{
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
......@@ -199,7 +202,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
}
#endif
if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx;
......@@ -246,6 +249,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
value += tempValue1;
localData[tbx+tj].value += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
}
tj = (tj + 1) & (TILE_SIZE - 1);
}
......@@ -278,6 +283,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
}
value += tempValue1;
localData[tbx+tj].value += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF
}
#endif
......@@ -287,14 +294,19 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Write results.
atomicAdd(&global_value[atom1], static_cast<unsigned long long>((long long) (value*0x100000000)));
unsigned int offset1 = atom1;
atomicAdd(&global_value[offset1], static_cast<unsigned long long>((long long) (value*0x100000000)));
STORE_PARAM_DERIVS1
#ifdef USE_CUTOFF
unsigned int atom2 = atomIndices[threadIdx.x];
#else
unsigned int atom2 = y*TILE_SIZE + tgx;
#endif
if (atom2 < PADDED_NUM_ATOMS)
atomicAdd(&global_value[atom2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].value*0x100000000)));
if (atom2 < PADDED_NUM_ATOMS) {
unsigned int offset2 = atom2;
atomicAdd(&global_value[offset2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].value*0x100000000)));
STORE_PARAM_DERIVS2
}
}
pos++;
}
......
......@@ -8,6 +8,7 @@ extern "C" __global__ void computePerParticleValues(real4* posq, long long* valu
// Load the pairwise value
real sum = valueBuffers[index]/(real) 0x100000000;
REDUCE_PARAM0_DERIV
// Now calculate other values
......
......@@ -33,7 +33,8 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ posDelta,
mixed4* __restrict__ velm, const long long* __restrict__ force, const mixed2* __restrict__ dt, const mixed* __restrict__ globals,
mixed* __restrict__ sum, const float4* __restrict__ gaussianValues, unsigned int gaussianBaseIndex, const float4* __restrict__ uniformValues, const real energy
mixed* __restrict__ sum, const float4* __restrict__ gaussianValues, unsigned int gaussianBaseIndex, const float4* __restrict__ uniformValues,
const real energy, mixed* __restrict__ energyParamDerivs
PARAMETER_ARGUMENTS) {
mixed stepSize = dt[0].y;
int index = blockIdx.x*blockDim.x+threadIdx.x;
......
......@@ -4,15 +4,18 @@ if (!isExcluded && r2 < CUTOFF_SQUARED) {
if (!isExcluded) {
#endif
real tempForce = 0;
COMPUTE_FORCE
real switchValue = 1, switchDeriv = 0;
#if USE_SWITCH
if (r > SWITCH_CUTOFF) {
real x = r-SWITCH_CUTOFF;
real switchValue = 1+x*x*x*(SWITCH_C3+x*(SWITCH_C4+x*SWITCH_C5));
real switchDeriv = x*x*(3*SWITCH_C3+x*(4*SWITCH_C4+x*5*SWITCH_C5));
switchValue = 1+x*x*x*(SWITCH_C3+x*(SWITCH_C4+x*SWITCH_C5));
switchDeriv = x*x*(3*SWITCH_C3+x*(4*SWITCH_C4+x*5*SWITCH_C5));
}
#endif
COMPUTE_FORCE
#if USE_SWITCH
tempForce = tempForce*switchValue - tempEnergy*switchDeriv;
tempEnergy *= switchValue;
}
#endif
dEdR += tempForce*invR;
}
......@@ -204,6 +204,8 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else
......@@ -225,16 +227,12 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
else
#endif
{
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
......@@ -257,7 +255,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
}
#endif
if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx;
......@@ -559,6 +557,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else
......@@ -580,16 +580,12 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
else
#endif
{
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
......@@ -612,7 +608,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
}
#endif
if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx;
......
......@@ -113,11 +113,14 @@ extern "C" __global__ void computeNonbonded(
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); // index within the warp
const unsigned int tbx = threadIdx.x - tgx; // block warpIndex
mixed energy = 0;
INIT_DERIVATIVES
// used shared memory if the device cannot shuffle
#ifndef ENABLE_SHUFFLE
__shared__ AtomData localData[THREAD_BLOCK_SIZE];
#endif
// First loop: process tiles that contain exclusions.
const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+warp*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(warp+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) {
......@@ -173,6 +176,7 @@ extern "C" __global__ void computeNonbonded(
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
#endif
real tempEnergy = 0.0f;
const real interactionScale = 0.5f;
COMPUTE_INTERACTION
energy += 0.5f*tempEnergy;
#ifdef INCLUDE_FORCES
......@@ -241,6 +245,7 @@ extern "C" __global__ void computeNonbonded(
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
#endif
real tempEnergy = 0.0f;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
......@@ -309,8 +314,11 @@ extern "C" __global__ void computeNonbonded(
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
// of them (no cutoff).
#ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (numTiles > maxTiles ? startTileIndex+warp*(long long)numTileIndices/totalWarps : warp*(long long)numTiles/totalWarps);
int end = (int) (numTiles > maxTiles ? startTileIndex+(warp+1)*(long long)numTileIndices/totalWarps : (warp+1)*(long long)numTiles/totalWarps);
#else
......@@ -335,16 +343,12 @@ extern "C" __global__ void computeNonbonded(
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
}
else
#endif
{
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
......@@ -367,7 +371,7 @@ extern "C" __global__ void computeNonbonded(
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
}
#endif
if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx;
// Load atom data for this tile.
......@@ -447,6 +451,7 @@ extern "C" __global__ void computeNonbonded(
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS);
#endif
real tempEnergy = 0.0f;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
......@@ -517,6 +522,7 @@ extern "C" __global__ void computeNonbonded(
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS);
#endif
real tempEnergy = 0.0f;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
......@@ -585,4 +591,5 @@ extern "C" __global__ void computeNonbonded(
#ifdef INCLUDE_ENERGY
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
#endif
SAVE_DERIVATIVES
}
\ No newline at end of file
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2015 Stanford University and the Authors. *
* Portions copyright (c) 2011-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -100,6 +100,15 @@ public:
* refer to it by this name.
*/
std::string addArgument(cl::Memory& data, const std::string& type);
/**
* Register that the interaction kernel will be computing the derivative of the potential energy
* with respect to a parameter.
*
* @param param the name of the parameter
* @return the variable that will be used to accumulate the derivative. Any code you pass to addInteraction() should
* add its contributions to this variable.
*/
std::string addEnergyParameterDerivative(const std::string& param);
/**
* Add some OpenCL code that should be included in the program, before the start of the kernel.
* This can be used, for example, to define functions that will be called by the kernel.
......@@ -137,6 +146,7 @@ private:
std::vector<OpenCLArray*> atomIndices;
std::vector<OpenCLArray*> bufferIndices;
std::vector<std::string> prefixCode;
std::vector<std::string> energyParameterDerivatives;
int numForceBuffers, maxBonds, allGroups;
bool hasInitializedKernels;
};
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -264,6 +264,12 @@ public:
OpenCLArray& getEnergyBuffer() {
return *energyBuffer;
}
/**
* Get the array which contains the buffer in which derivatives of the energy with respect to parameters are computed.
*/
OpenCLArray& getEnergyParamDerivBuffer() {
return *energyParamDerivBuffer;
}
/**
* Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
* This is guaranteed to be at least as large as any of the arrays returned by methods of this class.
......@@ -408,6 +414,18 @@ public:
void setStepsSinceReorder(int steps) {
stepsSinceReorder = steps;
}
/**
* Get the flag that marks whether the current force evaluation is valid.
*/
bool getForcesValid() const {
return forcesValid;
}
/**
* Get the flag that marks whether the current force evaluation is valid.
*/
void setForcesValid(bool valid) {
forcesValid = valid;
}
/**
* Get the number of atoms.
*/
......@@ -647,6 +665,27 @@ public:
std::vector<ForcePostComputation*>& getPostComputations() {
return postComputations;
}
/**
* Get the names of all parameters with respect to which energy derivatives are computed.
*/
const std::vector<std::string>& getEnergyParamDerivNames() const {
return energyParamDerivNames;
}
/**
* Get a workspace data structure used for accumulating the values of derivatives of the energy
* with respect to parameters.
*/
std::map<std::string, double>& getEnergyParamDerivWorkspace() {
return energyParamDerivWorkspace;
}
/**
* Register that the derivative of potential energy with respect to a context parameter
* will need to be calculated. If this is called multiple times for a single parameter,
* it is only added to the list once.
*
* @param param the name of the parameter to add
*/
void addEnergyParameterDerivative(const std::string& param);
/**
* Mark that the current molecule definitions (and hence the atom order) may be invalid.
* This should be called whenever force field parameters change. It will cause the definitions
......@@ -684,7 +723,7 @@ private:
int numThreadBlocks;
int numForceBuffers;
int simdWidth;
bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, atomsWereReordered, boxIsTriclinic;
bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, atomsWereReordered, boxIsTriclinic, forcesValid;
mm_float4 periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ;
mm_double4 periodicBoxSizeDouble, invPeriodicBoxSizeDouble, periodicBoxVecXDouble, periodicBoxVecYDouble, periodicBoxVecZDouble;
std::string defaultOptimizationOptions;
......@@ -713,7 +752,10 @@ private:
OpenCLArray* forceBuffers;
OpenCLArray* longForceBuffer;
OpenCLArray* energyBuffer;
OpenCLArray* energyParamDerivBuffer;
OpenCLArray* atomIndexDevice;
std::vector<std::string> energyParamDerivNames;
std::map<std::string, double> energyParamDerivWorkspace;
std::vector<int> atomIndex;
std::vector<cl::Memory*> autoclearBuffers;
std::vector<int> autoclearBufferSizes;
......
......@@ -141,6 +141,12 @@ public:
* @param forces on exit, this contains the forces
*/
void getForces(ContextImpl& context, std::vector<Vec3>& forces);
/**
* Get the current derivatives of the energy with respect to context parameters.
*
* @param derivs on exit, this contains the derivatives
*/
void getEnergyParameterDerivatives(ContextImpl& context, std::map<std::string, double>& derivs);
/**
* Get the current periodic box vectors.
*
......@@ -709,6 +715,7 @@ private:
std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray*> tabulatedFunctions;
double longRangeCoefficient;
std::vector<double> longRangeCoefficientDerivs;
bool hasInitializedLongRangeCorrection, hasInitializedKernel;
int numGroupThreadBlocks;
CustomNonbondedForce* forceCopy;
......@@ -801,13 +808,15 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomGBForce& force);
private:
double cutoff;
bool hasInitializedKernels, needParameterGradient;
bool hasInitializedKernels, needParameterGradient, needEnergyParamDerivs;
int maxTiles, numComputedValues;
OpenCLContext& cl;
OpenCLParameterSet* params;
OpenCLParameterSet* computedValues;
OpenCLParameterSet* energyDerivs;
OpenCLParameterSet* energyDerivChain;
std::vector<OpenCLParameterSet*> dValuedParam;
std::vector<OpenCLArray*> dValue0dParam;
OpenCLArray* longEnergyDerivs;
OpenCLArray* globals;
OpenCLArray* valueBuffers;
......@@ -953,6 +962,7 @@ public:
private:
int numGroups, numBonds;
bool needEnergyParamDerivs;
OpenCLContext& cl;
OpenCLParameterSet* params;
OpenCLArray* globals;
......@@ -1339,7 +1349,7 @@ public:
enum GlobalTargetType {DT, VARIABLE, PARAMETER};
OpenCLIntegrateCustomStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateCustomStepKernel(name, platform), cl(cl),
hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), sumBuffer(NULL), summedValue(NULL), uniformRandoms(NULL),
randomSeed(NULL), perDofValues(NULL) {
randomSeed(NULL), perDofEnergyParamDerivs(NULL), perDofValues(NULL), needsEnergyParamDerivs(false) {
}
~OpenCLIntegrateCustomStepKernel();
/**
......@@ -1404,8 +1414,11 @@ public:
private:
class ReorderListener;
class GlobalTarget;
class DerivFunction;
std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName);
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
Lepton::ExpressionTreeNode replaceDerivFunctions(const Lepton::ExpressionTreeNode& node, OpenMM::ContextImpl& context);
void findExpressionsForDerivs(const Lepton::ExpressionTreeNode& node, std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variableNodes);
void recordGlobalValue(double value, GlobalTarget target);
void recordChangedParameters(ContextImpl& context);
bool evaluateCondition(int step);
......@@ -1413,18 +1426,23 @@ private:
double energy;
float energyFloat;
int numGlobalVariables;
bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints;
bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs;
mutable bool localValuesAreCurrent;
OpenCLArray* globalValues;
OpenCLArray* sumBuffer;
OpenCLArray* summedValue;
OpenCLArray* uniformRandoms;
OpenCLArray* randomSeed;
OpenCLArray* perDofEnergyParamDerivs;
std::map<int, OpenCLArray*> savedForces;
std::set<int> validSavedForces;
OpenCLParameterSet* perDofValues;
mutable std::vector<std::vector<cl_float> > localPerDofValuesFloat;
mutable std::vector<std::vector<cl_double> > localPerDofValuesDouble;
std::map<std::string, double> energyParamDerivs;
std::vector<std::string> perDofEnergyParamDerivNames;
std::vector<cl_float> localPerDofEnergyParamDerivsFloat;
std::vector<cl_double> localPerDofEnergyParamDerivsDouble;
std::vector<float> globalValuesFloat;
std::vector<double> globalValuesDouble;
std::vector<double> initialGlobalVariables;
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2013 Stanford University and the Authors. *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -88,6 +88,15 @@ public:
* Add an array (other than a per-atom parameter) that should be passed as an argument to the default interaction kernel.
*/
void addArgument(const ParameterInfo& parameter);
/**
* Register that the interaction kernel will be computing the derivative of the potential energy
* with respect to a parameter.
*
* @param param the name of the parameter
* @return the variable that will be used to accumulate the derivative. Any code you pass to addInteraction() should
* add its contributions to this variable.
*/
std::string addEnergyParameterDerivative(const std::string& param);
/**
* Specify the list of exclusions that an interaction outside the default kernel will depend on.
*
......@@ -281,9 +290,13 @@ private:
OpenCLArray* oldPositions;
OpenCLArray* rebuildNeighborList;
OpenCLSort* blockSorter;
cl::Event downloadCountEvent;
cl::Buffer* pinnedCountBuffer;
int* pinnedCountMemory;
std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters;
std::vector<ParameterInfo> arguments;
std::vector<std::string> energyParameterDerivatives;
std::map<int, double> groupCutoff;
std::map<int, std::string> groupKernelSource;
double lastCutoff;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2015 Stanford University and the Authors. *
* Portions copyright (c) 2011-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -56,12 +56,25 @@ void OpenCLBondedUtilities::addInteraction(const vector<vector<int> >& atoms, co
}
}
std::string OpenCLBondedUtilities::addArgument(cl::Memory& data, const string& type) {
string OpenCLBondedUtilities::addArgument(cl::Memory& data, const string& type) {
arguments.push_back(&data);
argTypes.push_back(type);
return "customArg"+context.intToString(arguments.size());
}
string OpenCLBondedUtilities::addEnergyParameterDerivative(const string& param) {
// See if the parameter has already been added.
int index;
for (index = 0; index < energyParameterDerivatives.size(); index++)
if (param == energyParameterDerivatives[index])
break;
if (index == energyParameterDerivatives.size())
energyParameterDerivatives.push_back(param);
context.addEnergyParameterDerivative(param);
return string("energyParamDeriv")+context.intToString(index);
}
void OpenCLBondedUtilities::addPrefixCode(const string& source) {
for (int i = 0; i < (int) prefixCode.size(); i++)
if (prefixCode[i] == source)
......@@ -190,13 +203,23 @@ void OpenCLBondedUtilities::initialize(const System& system) {
}
for (int i = 0; i < (int) arguments.size(); i++)
s<<", __global "<<argTypes[i]<<"* customArg"<<(i+1);
if (energyParameterDerivatives.size() > 0)
s<<", __global mixed* restrict energyParamDerivs";
s<<") {\n";
s<<"mixed energy = 0;\n";
for (int i = 0; i < energyParameterDerivatives.size(); i++)
s<<"mixed energyParamDeriv"<<i<<" = 0;\n";
for (int i = 0; i < setSize; i++) {
int force = set[i];
s<<createForceSource(i, forceAtoms[force].size(), forceAtoms[force][0].size(), forceGroup[force], forceSource[force]);
}
s<<"energyBuffer[get_global_id(0)] += energy;\n";
const vector<string>& allParamDerivNames = context.getEnergyParamDerivNames();
int numDerivs = allParamDerivNames.size();
for (int i = 0; i < energyParameterDerivatives.size(); i++)
for (int index = 0; index < numDerivs; index++)
if (allParamDerivNames[index] == energyParameterDerivatives[i])
s<<"energyParamDerivs[get_global_id(0)*"<<numDerivs<<"+"<<index<<"] += energyParamDeriv"<<i<<";\n";
s<<"}\n";
map<string, string> defines;
defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms());
......@@ -274,6 +297,8 @@ void OpenCLBondedUtilities::computeInteractions(int groups) {
}
for (int j = 0; j < (int) arguments.size(); j++)
kernel.setArg<cl::Memory>(index++, *arguments[j]);
if (energyParameterDerivatives.size() > 0)
kernel.setArg<cl::Memory>(index++, context.getEnergyParamDerivBuffer().getDeviceBuffer());
}
}
for (int i = 0; i < (int) kernels.size(); i++) {
......
......@@ -69,7 +69,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData) :
system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false), posq(NULL),
posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndexDevice(NULL), integration(NULL),
posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), integration(NULL),
expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
if (precision == "single") {
useDoublePrecision = false;
......@@ -179,10 +179,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
compilationDefines["WORK_GROUP_SIZE"] = intToString(ThreadBlockSize);
if (platformVendor.size() >= 5 && platformVendor.substr(0, 5) == "Intel")
defaultOptimizationOptions = "";
else if (platformVendor == "Apple")
defaultOptimizationOptions = "-cl-mad-enable -cl-no-signed-zeros";
else
defaultOptimizationOptions = "-cl-fast-relaxed-math";
defaultOptimizationOptions = "-cl-mad-enable -cl-no-signed-zeros";
supports64BitGlobalAtomics = (device.getInfo<CL_DEVICE_EXTENSIONS>().find("cl_khr_int64_base_atomics") != string::npos);
supportsDoublePrecision = (device.getInfo<CL_DEVICE_EXTENSIONS>().find("cl_khr_fp64") != string::npos);
if ((useDoublePrecision || useMixedPrecision) && !supportsDoublePrecision)
......@@ -437,6 +435,8 @@ OpenCLContext::~OpenCLContext() {
delete longForceBuffer;
if (energyBuffer != NULL)
delete energyBuffer;
if (energyParamDerivBuffer != NULL)
delete energyParamDerivBuffer;
if (atomIndexDevice != NULL)
delete atomIndexDevice;
if (integration != NULL)
......@@ -457,15 +457,16 @@ void OpenCLContext::initialize() {
numForceBuffers = std::max(numForceBuffers, bonded->getNumForceBuffers());
for (int i = 0; i < (int) forces.size(); i++)
numForceBuffers = std::max(numForceBuffers, forces[i]->getRequiredForceBuffers());
int energyBufferSize = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers());
if (useDoublePrecision) {
forceBuffers = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
force = OpenCLArray::create<mm_double4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force");
energyBuffer = OpenCLArray::create<cl_double>(*this, max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers()), "energyBuffer");
energyBuffer = OpenCLArray::create<cl_double>(*this, energyBufferSize, "energyBuffer");
}
else {
forceBuffers = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
force = OpenCLArray::create<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force");
energyBuffer = OpenCLArray::create<cl_double>(*this, max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers()), "energyBuffer");
energyBuffer = OpenCLArray::create<cl_double>(*this, energyBufferSize, "energyBuffer");
}
if (supports64BitGlobalAtomics) {
longForceBuffer = OpenCLArray::create<cl_long>(*this, 3*paddedNumAtoms, "longForceBuffer");
......@@ -477,7 +478,15 @@ void OpenCLContext::initialize() {
}
addAutoclearBuffer(*forceBuffers);
addAutoclearBuffer(*energyBuffer);
int bufferBytes = max(velm->getSize()*velm->getElementSize(), energyBuffer->getSize()*energyBuffer->getElementSize());
int numEnergyParamDerivs = energyParamDerivNames.size();
if (numEnergyParamDerivs > 0) {
if (useDoublePrecision || useMixedPrecision)
energyParamDerivBuffer = OpenCLArray::create<cl_double>(*this, numEnergyParamDerivs*energyBufferSize, "energyParamDerivBuffer");
else
energyParamDerivBuffer = OpenCLArray::create<cl_float>(*this, numEnergyParamDerivs*energyBufferSize, "energyParamDerivBuffer");
addAutoclearBuffer(*energyParamDerivBuffer);
}
int bufferBytes = max(velm->getSize()*velm->getElementSize(), energyBufferSize*energyBuffer->getElementSize());
pinnedBuffer = new cl::Buffer(context, CL_MEM_ALLOC_HOST_PTR, bufferBytes);
pinnedMemory = currentQueue.enqueueMapBuffer(*pinnedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
for (int i = 0; i < numAtoms; i++) {
......@@ -1052,7 +1061,6 @@ void OpenCLContext::reorderAtoms() {
reorderAtomsImpl<cl_float, mm_float4, cl_double, mm_double4>();
else
reorderAtomsImpl<cl_float, mm_float4, cl_float, mm_float4>();
nonbonded->updateNeighborListSize();
}
template <class Real, class Real4, class Mixed, class Mixed4>
......@@ -1232,6 +1240,15 @@ void OpenCLContext::addPostComputation(ForcePostComputation* computation) {
postComputations.push_back(computation);
}
void OpenCLContext::addEnergyParameterDerivative(const string& param) {
// See if this parameter has already been registered.
for (int i = 0; i < energyParamDerivNames.size(); i++)
if (param == energyParamDerivNames[i])
return;
energyParamDerivNames.push_back(param);
}
struct OpenCLContext::WorkThread::ThreadData {
ThreadData(std::queue<OpenCLContext::WorkTask*>& tasks, bool& waiting, bool& finished,
pthread_mutex_t& queueLock, pthread_cond_t& waitForTaskCondition, pthread_cond_t& queueEmptyCondition) :
......
......@@ -174,8 +174,8 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out << "if (x >= " << paramsFloat[2] << " && x <= " << paramsFloat[3] << " && y >= " << paramsFloat[4] << " && y <= " << paramsFloat[5] << ") {\n";
out << "x = (x - " << paramsFloat[2] << ")*" << paramsFloat[6] << ";\n";
out << "y = (y - " << paramsFloat[4] << ")*" << paramsFloat[7] << ";\n";
out << "int s = min((int) floor(x), " << paramsInt[0] << ");\n";
out << "int t = min((int) floor(y), " << paramsInt[1] << ");\n";
out << "int s = min((int) floor(x), " << paramsInt[0] << "-1);\n";
out << "int t = min((int) floor(y), " << paramsInt[1] << "-1);\n";
out << "int coeffIndex = 4*(s+" << paramsInt[0] << "*t);\n";
out << "float4 c[4];\n";
for (int j = 0; j < 4; j++)
......@@ -217,9 +217,9 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out << "x = (x - " << paramsFloat[3] << ")*" << paramsFloat[9] << ";\n";
out << "y = (y - " << paramsFloat[5] << ")*" << paramsFloat[10] << ";\n";
out << "z = (z - " << paramsFloat[7] << ")*" << paramsFloat[11] << ";\n";
out << "int s = min((int) floor(x), " << paramsInt[0] << ");\n";
out << "int t = min((int) floor(y), " << paramsInt[1] << ");\n";
out << "int u = min((int) floor(z), " << paramsInt[2] << ");\n";
out << "int s = min((int) floor(x), " << paramsInt[0] << "-1);\n";
out << "int t = min((int) floor(y), " << paramsInt[1] << "-1);\n";
out << "int u = min((int) floor(z), " << paramsInt[2] << "-1);\n";
out << "int coeffIndex = 16*(s+" << paramsInt[0] << "*(t+" << paramsInt[1] << "*u));\n";
out << "float4 c[16];\n";
for (int j = 0; j < 16; j++)
......
This diff is collapsed.
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -57,7 +57,7 @@ private:
OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : context(context), useCutoff(false), usePeriodic(false), anyExclusions(false), usePadding(true),
numForceBuffers(0), exclusionIndices(NULL), exclusionRowIndices(NULL), exclusionTiles(NULL), exclusions(NULL), interactingTiles(NULL), interactingAtoms(NULL),
interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL), sortedBlocks(NULL), sortedBlockCenter(NULL), sortedBlockBoundingBox(NULL),
oldPositions(NULL), rebuildNeighborList(NULL), blockSorter(NULL), forceRebuildNeighborList(true), lastCutoff(0.0), groupFlags(0) {
oldPositions(NULL), rebuildNeighborList(NULL), blockSorter(NULL), pinnedCountBuffer(NULL), pinnedCountMemory(NULL), forceRebuildNeighborList(true), lastCutoff(0.0), groupFlags(0) {
// Decide how many thread blocks and force buffers to use.
deviceIsCpu = (context.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
......@@ -90,6 +90,8 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con
numForceBuffers = numForceThreadBlocks*forceThreadBlockSize/OpenCLContext::TileSize;
}
}
pinnedCountBuffer = new cl::Buffer(context.getContext(), CL_MEM_ALLOC_HOST_PTR, sizeof(int));
pinnedCountMemory = (int*) context.getQueue().enqueueMapBuffer(*pinnedCountBuffer, CL_TRUE, CL_MAP_READ, 0, sizeof(int));
}
OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() {
......@@ -123,6 +125,8 @@ OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() {
delete rebuildNeighborList;
if (blockSorter != NULL)
delete blockSorter;
if (pinnedCountBuffer != NULL)
delete pinnedCountBuffer;
}
void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup) {
......@@ -158,6 +162,19 @@ void OpenCLNonbondedUtilities::addArgument(const ParameterInfo& parameter) {
arguments.push_back(parameter);
}
string OpenCLNonbondedUtilities::addEnergyParameterDerivative(const string& param) {
// See if the parameter has already been added.
int index;
for (index = 0; index < energyParameterDerivatives.size(); index++)
if (param == energyParameterDerivatives[index])
break;
if (index == energyParameterDerivatives.size())
energyParameterDerivatives.push_back(param);
context.addEnergyParameterDerivative(param);
return string("energyParamDeriv")+context.intToString(index);
}
void OpenCLNonbondedUtilities::requestExclusions(const vector<vector<int> >& exclusionList) {
if (anyExclusions) {
bool sameExclusions = (exclusionList.size() == atomExclusions.size());
......@@ -357,8 +374,6 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
if (lastCutoff != kernels.cutoffDistance)
forceRebuildNeighborList = true;
bool rebuild = false;
do {
setPeriodicBoxArgs(context, kernels.findBlockBoundsKernel, 1);
context.executeKernel(kernels.findBlockBoundsKernel, context.getNumAtoms());
blockSorter->sort(*sortedBlocks);
......@@ -367,10 +382,8 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
setPeriodicBoxArgs(context, kernels.findInteractingBlocksKernel, 0);
context.executeKernel(kernels.findInteractingBlocksKernel, context.getNumAtoms(), interactingBlocksThreadBlockSize);
forceRebuildNeighborList = false;
if (context.getComputeForceCount() == 1)
rebuild = updateNeighborListSize(); // This is the first time step, so check whether our initial guess was large enough.
} while (rebuild);
lastCutoff = kernels.cutoffDistance;
context.getQueue().enqueueReadBuffer(interactionCount->getDeviceBuffer(), CL_FALSE, 0, sizeof(int), pinnedCountMemory, NULL, &downloadCountEvent);
}
void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool includeForces, bool includeEnergy) {
......@@ -385,20 +398,22 @@ void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool include
setPeriodicBoxArgs(context, kernel, 9);
context.executeKernel(kernel, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
}
if (useCutoff && numTiles > 0) {
downloadCountEvent.wait();
updateNeighborListSize();
}
}
bool OpenCLNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff)
return false;
unsigned int* pinnedInteractionCount = (unsigned int*) context.getPinnedBuffer();
interactionCount->download(pinnedInteractionCount);
if (pinnedInteractionCount[0] <= (unsigned int) interactingTiles->getSize())
if (pinnedCountMemory[0] <= (unsigned int) interactingTiles->getSize())
return false;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
int maxTiles = (int) (1.2*pinnedInteractionCount[0]);
int maxTiles = (int) (1.2*pinnedCountMemory[0]);
int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2;
if (maxTiles > totalTiles)
maxTiles = totalTiles;
......@@ -430,6 +445,7 @@ bool OpenCLNonbondedUtilities::updateNeighborListSize() {
kernels.findInteractingBlocksKernel.setArg<cl_uint>(9, maxTiles);
}
forceRebuildNeighborList = true;
context.setForcesValid(false);
return true;
}
......@@ -588,6 +604,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
args << arguments[i].getName();
}
}
if (energyParameterDerivatives.size() > 0)
args << ", __global mixed* restrict energyParamDerivs";
replacements["PARAMETER_ARGUMENTS"] = args.str();
stringstream loadLocal1;
for (int i = 0; i < (int) params.size(); i++) {
......@@ -638,6 +656,18 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
}
}
replacements["LOAD_ATOM2_PARAMETERS"] = load2j.str();
stringstream initDerivs;
for (int i = 0; i < energyParameterDerivatives.size(); i++)
initDerivs<<"mixed energyParamDeriv"<<i<<" = 0;\n";
replacements["INIT_DERIVATIVES"] = initDerivs.str();
stringstream saveDerivs;
const vector<string>& allParamDerivNames = context.getEnergyParamDerivNames();
int numDerivs = allParamDerivNames.size();
for (int i = 0; i < energyParameterDerivatives.size(); i++)
for (int index = 0; index < numDerivs; index++)
if (allParamDerivNames[index] == energyParameterDerivatives[i])
saveDerivs<<"energyParamDerivs[get_global_id(0)*"<<numDerivs<<"+"<<index<<"] += energyParamDeriv"<<i<<";\n";
replacements["SAVE_DERIVATIVES"] = saveDerivs.str();
map<string, string> defines;
if (useCutoff)
defines["USE_CUTOFF"] = "1";
......@@ -713,5 +743,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
for (int i = 0; i < (int) arguments.size(); i++) {
kernel.setArg<cl::Memory>(index++, arguments[i].getMemory());
}
if (energyParameterDerivatives.size() > 0)
kernel.setArg<cl::Memory>(index++, context.getEnergyParamDerivBuffer().getDeviceBuffer());
return kernel;
}
......@@ -30,6 +30,10 @@
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += -prefactor*erfAlphaR;
}
else {
includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w;
}
}
else {
#if HAS_LENNARD_JONES
......
......@@ -116,10 +116,12 @@ __kernel void computeGroupForces(__global long* restrict groupForce, __global mi
__global const int* restrict bondGroups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
EXTRA_ARGS) {
mixed energy = 0;
INIT_PARAM_DERIVS
for (int index = get_global_id(0); index < NUM_BONDS; index += get_global_size(0)) {
COMPUTE_FORCE
}
energyBuffer[get_global_id(0)] += energy;
SAVE_PARAM_DERIVS
}
/**
......
......@@ -12,6 +12,7 @@
__kernel void computePerParticleEnergy(int bufferSize, int numBuffers, __global real4* restrict forceBuffers, __global mixed* restrict energyBuffer, __global const real4* restrict posq
PARAMETER_ARGUMENTS) {
mixed energy = 0;
INIT_PARAM_DERIVS
unsigned int index = get_global_id(0);
while (index < NUM_ATOMS) {
// Reduce the derivatives
......@@ -27,4 +28,5 @@ __kernel void computePerParticleEnergy(int bufferSize, int numBuffers, __global
index += get_global_size(0);
}
energyBuffer[get_global_id(0)] += energy;
SAVE_PARAM_DERIVS
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment