Commit b11af401 authored by peastman's avatar peastman
Browse files

Merge pull request #1140 from peastman/energy

Changes to generating nonbonded kernels
parents a2d5b985 a47300e6
......@@ -138,8 +138,12 @@ public:
void prepareInteractions(int forceGroups);
/**
* Compute the nonbonded interactions.
*
* @param forceGroups the flags specifying which force groups to include
* @param includeForces whether to compute forces
* @param includeEnergy whether to compute the potential energy
*/
void computeInteractions(int forceGroups);
void computeInteractions(int forceGroups, bool includeForces, bool includeEnergy);
/**
* Check to see if the neighbor list arrays are large enough, and make them bigger if necessary.
*
......@@ -235,8 +239,10 @@ public:
* @param useExclusions specifies whether exclusions are applied to this interaction
* @param isSymmetric specifies whether the interaction is symmetric
* @param groups the set of force groups this kernel is for
* @param includeForces whether this kernel should compute forces
* @param includeEnergy whether this kernel should compute potential energy
*/
CUfunction createInteractionKernel(const std::string& source, std::vector<ParameterInfo>& params, std::vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric, int groups);
CUfunction createInteractionKernel(const std::string& source, std::vector<ParameterInfo>& params, std::vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric, int groups, bool includeForces, bool includeEnergy);
/**
* Create the set of kernels that will be needed for a particular combination of force groups.
*
......@@ -282,7 +288,8 @@ class CudaNonbondedUtilities::KernelSet {
public:
bool hasForces;
double cutoffDistance;
CUfunction forceKernel;
std::string source;
CUfunction forceKernel, energyKernel, forceEnergyKernel;
CUfunction findBlockBoundsKernel;
CUfunction sortBoxDataKernel;
CUfunction findInteractingBlocksKernel;
......
......@@ -105,7 +105,7 @@ void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool
double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) {
cu.getBondedUtilities().computeInteractions(groups);
cu.getNonbondedUtilities().computeInteractions(groups);
cu.getNonbondedUtilities().computeInteractions(groups, includeForces, includeEnergy);
double sum = 0.0;
for (vector<CudaContext::ForcePostComputation*>::iterator iter = cu.getPostComputations().begin(); iter != cu.getPostComputations().end(); ++iter)
sum += (*iter)->computeForceAndEnergy(includeForces, includeEnergy, groups);
......
......@@ -388,12 +388,15 @@ void CudaNonbondedUtilities::prepareInteractions(int forceGroups) {
lastCutoff = kernels.cutoffDistance;
}
void CudaNonbondedUtilities::computeInteractions(int forceGroups) {
void CudaNonbondedUtilities::computeInteractions(int forceGroups, bool includeForces, bool includeEnergy) {
if ((forceGroups&groupFlags) == 0)
return;
KernelSet& kernels = groupKernels[forceGroups];
if (kernels.hasForces) {
context.executeKernel(kernels.forceKernel, &forceArgs[0], numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
CUfunction& kernel = (includeForces ? (includeEnergy ? kernels.forceEnergyKernel : kernels.forceKernel) : kernels.energyKernel);
if (kernel == NULL)
kernel = createInteractionKernel(kernels.source, parameters, arguments, true, true, forceGroups, includeForces, includeEnergy);
context.executeKernel(kernel, &forceArgs[0], numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
}
}
......@@ -454,8 +457,8 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
}
kernels.hasForces = (source.size() > 0);
kernels.cutoffDistance = cutoff;
if (kernels.hasForces)
kernels.forceKernel = createInteractionKernel(source, parameters, arguments, true, true, groups);
kernels.source = source;
kernels.forceKernel = kernels.energyKernel = kernels.forceEnergyKernel = NULL;
if (useCutoff) {
double padding = (usePadding ? 0.1*cutoff : 0.0);
double paddedCutoff = cutoff+padding;
......@@ -478,7 +481,7 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
groupKernels[groups] = kernels;
}
CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, vector<ParameterInfo>& params, vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric, int groups) {
CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, vector<ParameterInfo>& params, vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric, int groups, bool includeForces, bool includeEnergy) {
map<string, string> replacements;
replacements["COMPUTE_INTERACTION"] = source;
const string suffixes[] = {"x", "y", "z", "w"};
......@@ -654,6 +657,10 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
defines["USE_SYMMETRIC"] = "1";
if (useShuffle)
defines["ENABLE_SHUFFLE"] = "1";
if (includeForces)
defines["INCLUDE_FORCES"] = "1";
if (includeEnergy)
defines["INCLUDE_ENERGY"] = "1";
defines["THREAD_BLOCK_SIZE"] = context.intToString(forceThreadBlockSize);
double maxCutoff = 0.0;
for (int i = 0; i < 32; i++) {
......
......@@ -112,7 +112,7 @@ extern "C" __global__ void computeNonbonded(
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE; // global warpIndex
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); // index within the warp
const unsigned int tbx = threadIdx.x - tgx; // block warpIndex
real energy = 0.0f;
mixed energy = 0;
// used shared memory if the device cannot shuffle
#ifndef ENABLE_SHUFFLE
__shared__ AtomData localData[THREAD_BLOCK_SIZE];
......@@ -175,6 +175,7 @@ extern "C" __global__ void computeNonbonded(
real tempEnergy = 0.0f;
COMPUTE_INTERACTION
energy += 0.5f*tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
force.x -= delta.x*dEdR;
force.y -= delta.y*dEdR;
......@@ -184,6 +185,7 @@ extern "C" __global__ void computeNonbonded(
force.y -= dEdR1.y;
force.z -= dEdR1.z;
#endif
#endif
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
......@@ -241,6 +243,7 @@ extern "C" __global__ void computeNonbonded(
real tempEnergy = 0.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta *= dEdR;
force.x -= delta.x;
......@@ -270,11 +273,12 @@ extern "C" __global__ void computeNonbonded(
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif // end USE_SYMMETRIC
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
#endif
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
// cycles the indices
// 0 1 2 3 4 5 6 7 -> 1 2 3 4 5 6 7 0
......@@ -282,6 +286,7 @@ extern "C" __global__ void computeNonbonded(
}
const unsigned int offset = y*TILE_SIZE + tgx;
// write results for off diagonal tiles
#ifdef INCLUDE_FORCES
#ifdef ENABLE_SHUFFLE
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (shflForce.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.y*0x100000000)));
......@@ -290,13 +295,16 @@ extern "C" __global__ void computeNonbonded(
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fx*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fy*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fz*0x100000000)));
#endif
#endif
}
// Write results for on and off diagonal tiles
#ifdef INCLUDE_FORCES
const unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.z*0x100000000)));
#endif
}
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
......@@ -441,6 +449,7 @@ extern "C" __global__ void computeNonbonded(
real tempEnergy = 0.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta *= dEdR;
force.x -= delta.x;
......@@ -472,6 +481,7 @@ extern "C" __global__ void computeNonbonded(
#endif // end USE_SYMMETRIC
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
#endif
tj = (tj + 1) & (TILE_SIZE - 1);
}
......@@ -509,6 +519,7 @@ extern "C" __global__ void computeNonbonded(
real tempEnergy = 0.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta *= dEdR;
force.x -= delta.x;
......@@ -540,12 +551,14 @@ extern "C" __global__ void computeNonbonded(
#endif // end USE_SYMMETRIC
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
#endif
tj = (tj + 1) & (TILE_SIZE - 1);
}
}
// Write results.
#ifdef INCLUDE_FORCES
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (force.x*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.y*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.z*0x100000000)));
......@@ -565,8 +578,11 @@ extern "C" __global__ void computeNonbonded(
atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fz*0x100000000)));
#endif
}
#endif
}
pos++;
}
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
#ifdef INCLUDE_ENERGY
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += (real) energy;
#endif
}
\ No newline at end of file
......@@ -150,8 +150,12 @@ public:
void prepareInteractions(int forceGroups);
/**
* Compute the nonbonded interactions.
*
* @param forceGroups the flags specifying which force groups to include
* @param includeForces whether to compute forces
* @param includeEnergy whether to compute the potential energy
*/
void computeInteractions(int forceGroups);
void computeInteractions(int forceGroups, bool includeForces, bool includeEnergy);
/**
* Check to see if the neighbor list arrays are large enough, and make them bigger if necessary.
*
......@@ -247,8 +251,10 @@ public:
* @param useExclusions specifies whether exclusions are applied to this interaction
* @param isSymmetric specifies whether the interaction is symmetric
* @param groups the set of force groups this kernel is for
* @param includeForces whether this kernel should compute forces
* @param includeEnergy whether this kernel should compute potential energy
*/
cl::Kernel createInteractionKernel(const std::string& source, const std::vector<ParameterInfo>& params, const std::vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric, int groups);
cl::Kernel createInteractionKernel(const std::string& source, const std::vector<ParameterInfo>& params, const std::vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric, int groups, bool includeForces, bool includeEnergy);
/**
* Create the set of kernels that will be needed for a particular combination of force groups.
*
......@@ -294,7 +300,8 @@ class OpenCLNonbondedUtilities::KernelSet {
public:
bool hasForces;
double cutoffDistance;
cl::Kernel forceKernel;
std::string source;
cl::Kernel forceKernel, energyKernel, forceEnergyKernel;
cl::Kernel findBlockBoundsKernel;
cl::Kernel sortBoxDataKernel;
cl::Kernel findInteractingBlocksKernel;
......
......@@ -128,7 +128,7 @@ void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, boo
double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) {
cl.getBondedUtilities().computeInteractions(groups);
cl.getNonbondedUtilities().computeInteractions(groups);
cl.getNonbondedUtilities().computeInteractions(groups, includeForces, includeEnergy);
double sum = 0.0;
for (vector<OpenCLContext::ForcePostComputation*>::iterator iter = cl.getPostComputations().begin(); iter != cl.getPostComputations().end(); ++iter)
sum += (*iter)->computeForceAndEnergy(includeForces, includeEnergy, groups);
......
......@@ -373,14 +373,17 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
lastCutoff = kernels.cutoffDistance;
}
void OpenCLNonbondedUtilities::computeInteractions(int forceGroups) {
void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool includeForces, bool includeEnergy) {
if ((forceGroups&groupFlags) == 0)
return;
KernelSet& kernels = groupKernels[forceGroups];
if (kernels.hasForces) {
cl::Kernel& kernel = (includeForces ? (includeEnergy ? kernels.forceEnergyKernel : kernels.forceKernel) : kernels.energyKernel);
if (*reinterpret_cast<cl_kernel*>(&kernel) == NULL)
kernel = createInteractionKernel(kernels.source, parameters, arguments, true, true, forceGroups, includeForces, includeEnergy);
if (useCutoff)
setPeriodicBoxArgs(context, kernels.forceKernel, 9);
context.executeKernel(kernels.forceKernel, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
setPeriodicBoxArgs(context, kernel, 9);
context.executeKernel(kernel, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
}
}
......@@ -406,12 +409,25 @@ bool OpenCLNonbondedUtilities::updateNeighborListSize() {
interactingTiles = OpenCLArray::create<cl_int>(context, maxTiles, "interactingTiles");
interactingAtoms = OpenCLArray::create<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms");
for (map<int, KernelSet>::iterator iter = groupKernels.begin(); iter != groupKernels.end(); ++iter) {
iter->second.forceKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer());
iter->second.forceKernel.setArg<cl_uint>(14, maxTiles);
iter->second.forceKernel.setArg<cl::Buffer>(17, interactingAtoms->getDeviceBuffer());
iter->second.findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer());
iter->second.findInteractingBlocksKernel.setArg<cl::Buffer>(7, interactingAtoms->getDeviceBuffer());
iter->second.findInteractingBlocksKernel.setArg<cl_uint>(9, maxTiles);
KernelSet& kernels = iter->second;
if (*reinterpret_cast<cl_kernel*>(&kernels.forceKernel) != NULL) {
kernels.forceKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer());
kernels.forceKernel.setArg<cl_uint>(14, maxTiles);
kernels.forceKernel.setArg<cl::Buffer>(17, interactingAtoms->getDeviceBuffer());
}
if (*reinterpret_cast<cl_kernel*>(&kernels.energyKernel) != NULL) {
kernels.energyKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer());
kernels.energyKernel.setArg<cl_uint>(14, maxTiles);
kernels.energyKernel.setArg<cl::Buffer>(17, interactingAtoms->getDeviceBuffer());
}
if (*reinterpret_cast<cl_kernel*>(&kernels.forceEnergyKernel) != NULL) {
kernels.forceEnergyKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer());
kernels.forceEnergyKernel.setArg<cl_uint>(14, maxTiles);
kernels.forceEnergyKernel.setArg<cl::Buffer>(17, interactingAtoms->getDeviceBuffer());
}
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(7, interactingAtoms->getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl_uint>(9, maxTiles);
}
forceRebuildNeighborList = true;
return true;
......@@ -432,10 +448,21 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
// We are using a cutoff, and the kernels have already been created.
for (map<int, KernelSet>::iterator iter = groupKernels.begin(); iter != groupKernels.end(); ++iter) {
iter->second.forceKernel.setArg<cl_uint>(5, startTileIndex);
iter->second.forceKernel.setArg<cl_uint>(6, numTiles);
iter->second.findInteractingBlocksKernel.setArg<cl_uint>(10, startBlockIndex);
iter->second.findInteractingBlocksKernel.setArg<cl_uint>(11, numBlocks);
KernelSet& kernels = iter->second;
if (*reinterpret_cast<cl_kernel*>(&kernels.forceKernel) != NULL) {
kernels.forceKernel.setArg<cl_uint>(5, startTileIndex);
kernels.forceKernel.setArg<cl_uint>(6, numTiles);
}
if (*reinterpret_cast<cl_kernel*>(&kernels.energyKernel) != NULL) {
kernels.energyKernel.setArg<cl_uint>(5, startTileIndex);
kernels.energyKernel.setArg<cl_uint>(6, numTiles);
}
if (*reinterpret_cast<cl_kernel*>(&kernels.forceEnergyKernel) != NULL) {
kernels.forceEnergyKernel.setArg<cl_uint>(5, startTileIndex);
kernels.forceEnergyKernel.setArg<cl_uint>(6, numTiles);
}
kernels.findInteractingBlocksKernel.setArg<cl_uint>(10, startBlockIndex);
kernels.findInteractingBlocksKernel.setArg<cl_uint>(11, numBlocks);
}
forceRebuildNeighborList = true;
}
......@@ -453,8 +480,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
}
kernels.hasForces = (source.size() > 0);
kernels.cutoffDistance = cutoff;
if (kernels.hasForces)
kernels.forceKernel = createInteractionKernel(source, parameters, arguments, true, true, groups);
kernels.source = source;
if (useCutoff) {
double padding = (usePadding ? 0.1*cutoff : 0.0);
double paddedCutoff = cutoff+padding;
......@@ -524,7 +550,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
groupKernels[groups] = kernels;
}
cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& source, const vector<ParameterInfo>& params, const vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric, int groups) {
cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& source, const vector<ParameterInfo>& params, const vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric, int groups, bool includeForces, bool includeEnergy) {
map<string, string> replacements;
replacements["COMPUTE_INTERACTION"] = source;
const string suffixes[] = {"x", "y", "z", "w"};
......@@ -623,6 +649,10 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines["USE_SYMMETRIC"] = "1";
if (useCutoff && context.getSIMDWidth() < 32)
defines["PRUNE_BY_CUTOFF"] = "1";
if (includeForces)
defines["INCLUDE_FORCES"] = "1";
if (includeEnergy)
defines["INCLUDE_ENERGY"] = "1";
defines["FORCE_WORK_GROUP_SIZE"] = context.intToString(forceThreadBlockSize);
double maxCutoff = 0.0;
for (int i = 0; i < 32; i++) {
......
......@@ -34,7 +34,7 @@ __kernel void computeNonbonded(
const unsigned int warp = get_global_id(0)/TILE_SIZE;
const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1);
const unsigned int tbx = get_local_id(0) - tgx;
real energy = 0;
mixed energy = 0;
__local AtomData localData[FORCE_WORK_GROUP_SIZE];
// First loop: process tiles that contain exclusions.
......@@ -87,11 +87,13 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
COMPUTE_INTERACTION
energy += 0.5f*tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
force.xyz -= delta.xyz*dEdR;
#else
force.xyz -= dEdR1.xyz;
#endif
#endif
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
......@@ -144,6 +146,7 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
......@@ -156,6 +159,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif
#ifdef PRUNE_BY_CUTOFF
}
#endif
......@@ -169,6 +173,7 @@ __kernel void computeNonbonded(
// Write results.
#ifdef INCLUDE_FORCES
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) (force.x*0x100000000));
......@@ -186,6 +191,7 @@ __kernel void computeNonbonded(
forceBuffers[offset1].xyz += force.xyz;
if (x != y)
forceBuffers[offset2] += (real4) (localData[get_local_id(0)].fx, localData[get_local_id(0)].fy, localData[get_local_id(0)].fz, 0.0f);
#endif
#endif
}
......@@ -318,6 +324,7 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
......@@ -330,6 +337,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif
#ifdef PRUNE_BY_CUTOFF
}
#endif
......@@ -370,6 +378,7 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
......@@ -382,6 +391,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif
#ifdef PRUNE_BY_CUTOFF
}
#endif
......@@ -392,6 +402,7 @@ __kernel void computeNonbonded(
// Write results.
#ifdef INCLUDE_FORCES
#ifdef USE_CUTOFF
unsigned int atom2 = atomIndices[get_local_id(0)];
#else
......@@ -412,9 +423,12 @@ __kernel void computeNonbonded(
forceBuffers[offset1].xyz += force.xyz;
if (atom2 < PADDED_NUM_ATOMS)
forceBuffers[offset2] += (real4) (localData[get_local_id(0)].fx, localData[get_local_id(0)].fy, localData[get_local_id(0)].fz, 0.0f);
#endif
#endif
}
pos++;
}
energyBuffer[get_global_id(0)] += energy;
#ifdef INCLUDE_ENERGY
energyBuffer[get_global_id(0)] += (real) energy;
#endif
}
......@@ -2407,7 +2407,7 @@ double CudaCalcAmoebaVdwForceKernel::execute(ContextImpl& context, bool includeF
&bondReductionAtoms->getDevicePointer(), &bondReductionFactors->getDevicePointer()};
cu.executeKernel(prepareKernel, prepareArgs, cu.getPaddedNumAtoms());
nonbonded->prepareInteractions(1);
nonbonded->computeInteractions(1);
nonbonded->computeInteractions(1, includeForces, includeEnergy);
void* spreadArgs[] = {&cu.getForce().getDevicePointer(), &tempForces->getDevicePointer(), &bondReductionAtoms->getDevicePointer(), &bondReductionFactors->getDevicePointer()};
cu.executeKernel(spreadKernel, spreadArgs, cu.getPaddedNumAtoms());
tempPosq->copyTo(cu.getPosq());
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment