Commit a5e4de14 authored by Peter Eastman's avatar Peter Eastman
Browse files

Allow the neighbor list arrays to grow if the initial sizes are too small

parent d3f0d1f7
...@@ -71,8 +71,10 @@ void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) { ...@@ -71,8 +71,10 @@ void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) {
} }
void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy) { void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (cl.getNonbondedUtilities().getUseCutoff() && cl.getComputeForceCount()%100 == 0) if (cl.getNonbondedUtilities().getUseCutoff() && cl.getComputeForceCount()%100 == 0) {
cl.reorderAtoms(); cl.reorderAtoms();
cl.getNonbondedUtilities().updateNeighborListSize();
}
cl.setComputeForceCount(cl.getComputeForceCount()+1); cl.setComputeForceCount(cl.getComputeForceCount()+1);
cl.clearAutoclearBuffers(); cl.clearAutoclearBuffers();
cl.getNonbondedUtilities().prepareInteractions(); cl.getNonbondedUtilities().prepareInteractions();
...@@ -1679,6 +1681,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF ...@@ -1679,6 +1681,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
// These Kernels cannot be created in initialize(), because the OpenCLNonbondedUtilities has not been initialized yet then. // These Kernels cannot be created in initialize(), because the OpenCLNonbondedUtilities has not been initialized yet then.
hasCreatedKernels = true; hasCreatedKernels = true;
maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : 0);
map<string, string> defines; map<string, string> defines;
if (nb.getForceBufferPerAtomBlock()) if (nb.getForceBufferPerAtomBlock())
defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1"; defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
...@@ -1691,8 +1694,6 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF ...@@ -1691,8 +1694,6 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms()); defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = OpenCLExpressionUtilities::intToString(cl.getNumAtomBlocks()); defines["NUM_BLOCKS"] = OpenCLExpressionUtilities::intToString(cl.getNumAtomBlocks());
if (nb.getUseCutoff())
defines["MAX_TILES"] = OpenCLExpressionUtilities::intToString(nb.getInteractingTiles().getSize());
string file = (cl.getSIMDWidth() == 32 ? OpenCLKernelSources::gbsaObc_nvidia : OpenCLKernelSources::gbsaObc_default); string file = (cl.getSIMDWidth() == 32 ? OpenCLKernelSources::gbsaObc_nvidia : OpenCLKernelSources::gbsaObc_default);
cl::Program program = cl.createProgram(file, defines); cl::Program program = cl.createProgram(file, defines);
int index = 0; int index = 0;
...@@ -1705,8 +1706,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF ...@@ -1705,8 +1706,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
if (nb.getUseCutoff()) { if (nb.getUseCutoff()) {
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer()); computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer()); computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
index += 2; // The periodic box size arguments are set when the kernel is executed.
computeBornSumKernel.setArg<cl_uint>(index++, maxTiles);
if (cl.getSIMDWidth() == 32) if (cl.getSIMDWidth() == 32)
computeBornSumKernel.setArg<cl::Buffer>(index+2, nb.getInteractionFlags().getDeviceBuffer()); computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractionFlags().getDeviceBuffer());
} }
else else
computeBornSumKernel.setArg<cl_uint>(index++, cl.getNumAtomBlocks()*(cl.getNumAtomBlocks()+1)/2); computeBornSumKernel.setArg<cl_uint>(index++, cl.getNumAtomBlocks()*(cl.getNumAtomBlocks()+1)/2);
...@@ -1722,8 +1725,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF ...@@ -1722,8 +1725,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
if (nb.getUseCutoff()) { if (nb.getUseCutoff()) {
force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer()); force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer()); force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
index += 2; // The periodic box size arguments are set when the kernel is executed.
force1Kernel.setArg<cl_uint>(index++, maxTiles);
if (cl.getSIMDWidth() == 32) if (cl.getSIMDWidth() == 32)
force1Kernel.setArg<cl::Buffer>(index+2, nb.getInteractionFlags().getDeviceBuffer()); force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractionFlags().getDeviceBuffer());
} }
else else
force1Kernel.setArg<cl_uint>(index++, cl.getNumAtomBlocks()*(cl.getNumAtomBlocks()+1)/2); force1Kernel.setArg<cl_uint>(index++, cl.getNumAtomBlocks()*(cl.getNumAtomBlocks()+1)/2);
...@@ -1752,6 +1757,11 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF ...@@ -1752,6 +1757,11 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
computeBornSumKernel.setArg<mm_float4>(8, cl.getInvPeriodicBoxSize()); computeBornSumKernel.setArg<mm_float4>(8, cl.getInvPeriodicBoxSize());
force1Kernel.setArg<mm_float4>(9, cl.getPeriodicBoxSize()); force1Kernel.setArg<mm_float4>(9, cl.getPeriodicBoxSize());
force1Kernel.setArg<mm_float4>(10, cl.getInvPeriodicBoxSize()); force1Kernel.setArg<mm_float4>(10, cl.getInvPeriodicBoxSize());
if (maxTiles < nb.getInteractingTiles().getSize()) {
maxTiles = nb.getInteractingTiles().getSize();
computeBornSumKernel.setArg<cl_uint>(9, maxTiles);
force1Kernel.setArg<cl_uint>(11, maxTiles);
}
} }
int numTiles = cl.getNumAtomBlocks()*(cl.getNumAtomBlocks()+1)/2; int numTiles = cl.getNumAtomBlocks()*(cl.getNumAtomBlocks()+1)/2;
cl.executeKernel(computeBornSumKernel, numTiles*OpenCLContext::TileSize); cl.executeKernel(computeBornSumKernel, numTiles*OpenCLContext::TileSize);
...@@ -2389,6 +2399,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -2389,6 +2399,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities(); OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities();
if (!hasInitializedKernels) { if (!hasInitializedKernels) {
hasInitializedKernels = true; hasInitializedKernels = true;
maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : 0);
valueBuffers = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms()*cl.getNumForceBuffers(), "customGBValueBuffers"); valueBuffers = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms()*cl.getNumForceBuffers(), "customGBValueBuffers");
cl.addAutoclearBuffer(valueBuffers->getDeviceBuffer(), valueBuffers->getSize()); cl.addAutoclearBuffer(valueBuffers->getDeviceBuffer(), valueBuffers->getSize());
cl.clearBuffer(*valueBuffers); cl.clearBuffer(*valueBuffers);
...@@ -2405,7 +2416,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -2405,7 +2416,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
index += 2; // Periodic box size arguments are set when the kernel is executed. index += 2; // Periodic box size arguments are set when the kernel is executed.
pairValueKernel.setArg<cl_uint>(index++, nb.getInteractingTiles().getSize()); pairValueKernel.setArg<cl_uint>(index++, maxTiles);
if (cl.getSIMDWidth() == 32) if (cl.getSIMDWidth() == 32)
pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractionFlags().getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractionFlags().getDeviceBuffer());
} }
...@@ -2453,7 +2464,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -2453,7 +2464,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer()); pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer()); pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
index += 2; // Periodic box size arguments are set when the kernel is executed. index += 2; // Periodic box size arguments are set when the kernel is executed.
pairEnergyKernel.setArg<cl_uint>(index++, nb.getInteractingTiles().getSize()); pairEnergyKernel.setArg<cl_uint>(index++, maxTiles);
if (cl.getSIMDWidth() == 32) if (cl.getSIMDWidth() == 32)
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractionFlags().getDeviceBuffer()); pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractionFlags().getDeviceBuffer());
} }
...@@ -2530,6 +2541,11 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -2530,6 +2541,11 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
pairValueKernel.setArg<mm_float4>(11, cl.getInvPeriodicBoxSize()); pairValueKernel.setArg<mm_float4>(11, cl.getInvPeriodicBoxSize());
pairEnergyKernel.setArg<mm_float4>(11, cl.getPeriodicBoxSize()); pairEnergyKernel.setArg<mm_float4>(11, cl.getPeriodicBoxSize());
pairEnergyKernel.setArg<mm_float4>(12, cl.getInvPeriodicBoxSize()); pairEnergyKernel.setArg<mm_float4>(12, cl.getInvPeriodicBoxSize());
if (maxTiles < nb.getInteractingTiles().getSize()) {
maxTiles = nb.getInteractingTiles().getSize();
pairValueKernel.setArg<cl_uint>(12, maxTiles);
pairEnergyKernel.setArg<cl_uint>(13, maxTiles);
}
} }
int numTiles = cl.getNumAtomBlocks()*(cl.getNumAtomBlocks()+1)/2; int numTiles = cl.getNumAtomBlocks()*(cl.getNumAtomBlocks()+1)/2;
cl.executeKernel(pairValueKernel, numTiles*OpenCLContext::TileSize); cl.executeKernel(pairValueKernel, numTiles*OpenCLContext::TileSize);
......
...@@ -592,6 +592,7 @@ public: ...@@ -592,6 +592,7 @@ public:
private: private:
double prefactor; double prefactor;
bool hasCreatedKernels; bool hasCreatedKernels;
int maxTiles;
OpenCLContext& cl; OpenCLContext& cl;
OpenCLArray<mm_float2>* params; OpenCLArray<mm_float2>* params;
OpenCLArray<cl_float>* bornSum; OpenCLArray<cl_float>* bornSum;
...@@ -632,6 +633,7 @@ public: ...@@ -632,6 +633,7 @@ public:
double execute(ContextImpl& context, bool includeForces, bool includeEnergy); double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
private: private:
bool hasInitializedKernels, needParameterGradient; bool hasInitializedKernels, needParameterGradient;
int maxTiles;
OpenCLContext& cl; OpenCLContext& cl;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLParameterSet* computedValues; OpenCLParameterSet* computedValues;
......
...@@ -212,9 +212,11 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -212,9 +212,11 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
interactingTiles = new OpenCLArray<mm_ushort2>(context, maxInteractingTiles, "interactingTiles"); interactingTiles = new OpenCLArray<mm_ushort2>(context, maxInteractingTiles, "interactingTiles");
if (context.getSIMDWidth() == 32) if (context.getSIMDWidth() == 32)
interactionFlags = new OpenCLArray<cl_uint>(context, maxInteractingTiles, "interactionFlags"); interactionFlags = new OpenCLArray<cl_uint>(context, maxInteractingTiles, "interactionFlags");
interactionCount = new OpenCLArray<cl_uint>(context, 1, "interactionCount"); interactionCount = new OpenCLArray<cl_uint>(context, 1, "interactionCount", true);
blockCenter = new OpenCLArray<mm_float4>(context, numAtomBlocks, "blockCenter"); blockCenter = new OpenCLArray<mm_float4>(context, numAtomBlocks, "blockCenter");
blockBoundingBox = new OpenCLArray<mm_float4>(context, numAtomBlocks, "blockBoundingBox"); blockBoundingBox = new OpenCLArray<mm_float4>(context, numAtomBlocks, "blockBoundingBox");
interactionCount->set(0, 0);
interactionCount->upload();
} }
// Create kernels. // Create kernels.
...@@ -223,7 +225,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -223,7 +225,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
if (useCutoff) { if (useCutoff) {
map<string, string> defines; map<string, string> defines;
defines["NUM_BLOCKS"] = OpenCLExpressionUtilities::intToString(context.getNumAtomBlocks()); defines["NUM_BLOCKS"] = OpenCLExpressionUtilities::intToString(context.getNumAtomBlocks());
defines["MAX_TILES"] = OpenCLExpressionUtilities::intToString(interactingTiles->getSize());
if (forceBufferPerAtomBlock) if (forceBufferPerAtomBlock)
defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1"; defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
if (usePeriodic) if (usePeriodic)
...@@ -242,6 +243,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -242,6 +243,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findInteractingBlocksKernel.setArg<cl::Buffer>(5, interactionCount->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(5, interactionCount->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(7, context.getPosq().getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(7, context.getPosq().getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl_uint>(8, interactingTiles->getSize());
if (context.getSIMDWidth() == 32) { if (context.getSIMDWidth() == 32) {
findInteractionsWithinBlocksKernel = cl::Kernel(interactingBlocksProgram, "findInteractionsWithinBlocks"); findInteractionsWithinBlocksKernel = cl::Kernel(interactingBlocksProgram, "findInteractionsWithinBlocks");
findInteractionsWithinBlocksKernel.setArg<cl_float>(0, (cl_float) (cutoff*cutoff)); findInteractionsWithinBlocksKernel.setArg<cl_float>(0, (cl_float) (cutoff*cutoff));
...@@ -252,6 +254,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -252,6 +254,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(7, interactionFlags->getDeviceBuffer()); findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(7, interactionFlags->getDeviceBuffer());
findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(8, interactionCount->getDeviceBuffer()); findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(8, interactionCount->getDeviceBuffer());
findInteractionsWithinBlocksKernel.setArg(9, OpenCLContext::ThreadBlockSize*sizeof(cl_uint), NULL); findInteractionsWithinBlocksKernel.setArg(9, OpenCLContext::ThreadBlockSize*sizeof(cl_uint), NULL);
findInteractionsWithinBlocksKernel.setArg<cl_uint>(10, interactingTiles->getSize());
} }
} }
} }
...@@ -296,6 +299,36 @@ void OpenCLNonbondedUtilities::computeInteractions() { ...@@ -296,6 +299,36 @@ void OpenCLNonbondedUtilities::computeInteractions() {
} }
} }
void OpenCLNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff)
return;
interactionCount->download();
if (interactionCount->get(0) <= interactingTiles->getSize())
return;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
int newSize = (int) (1.2*interactionCount->get(0));
int numTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2;
if (newSize > numTiles)
newSize = numTiles;
delete interactingTiles;
interactingTiles = new OpenCLArray<mm_ushort2>(context, newSize, "interactingTiles");
forceKernel.setArg<cl::Buffer>(8, interactingTiles->getDeviceBuffer());
forceKernel.setArg<cl_uint>(12, newSize);
findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl_uint>(8, newSize);
if (context.getSIMDWidth() == 32) {
delete interactionFlags;
interactionFlags = new OpenCLArray<cl_uint>(context, newSize, "interactionFlags");
forceKernel.setArg<cl::Buffer>(13, interactionFlags->getDeviceBuffer());
findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(4, interactingTiles->getDeviceBuffer());
findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(7, interactionFlags->getDeviceBuffer());
findInteractionsWithinBlocksKernel.setArg<cl_uint>(10, newSize);
}
}
cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& source, const vector<ParameterInfo>& params, const vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric) const { cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& source, const vector<ParameterInfo>& params, const vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric) const {
map<string, string> replacements; map<string, string> replacements;
replacements["COMPUTE_INTERACTION"] = source; replacements["COMPUTE_INTERACTION"] = source;
...@@ -403,8 +436,6 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -403,8 +436,6 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines["NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(context.getNumAtoms()); defines["NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(context.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(context.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(context.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = OpenCLExpressionUtilities::intToString(context.getNumAtomBlocks()); defines["NUM_BLOCKS"] = OpenCLExpressionUtilities::intToString(context.getNumAtomBlocks());
if (useCutoff)
defines["MAX_TILES"] = OpenCLExpressionUtilities::intToString(interactingTiles->getSize());
string file = (context.getSIMDWidth() == 32 ? OpenCLKernelSources::nonbonded_nvidia : OpenCLKernelSources::nonbonded_default); string file = (context.getSIMDWidth() == 32 ? OpenCLKernelSources::nonbonded_nvidia : OpenCLKernelSources::nonbonded_default);
cl::Program program = context.createProgram(context.replaceStrings(file, replacements), defines); cl::Program program = context.createProgram(context.replaceStrings(file, replacements), defines);
cl::Kernel kernel(program, "computeNonbonded"); cl::Kernel kernel(program, "computeNonbonded");
...@@ -424,6 +455,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -424,6 +455,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
kernel.setArg<cl::Buffer>(index++, interactingTiles->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, interactingTiles->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, interactionCount->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, interactionCount->getDeviceBuffer());
index += 2; // The periodic box size arguments are set when the kernel is executed. index += 2; // The periodic box size arguments are set when the kernel is executed.
kernel.setArg<cl_uint>(index++, interactingTiles->getSize());
if (context.getSIMDWidth() == 32) if (context.getSIMDWidth() == 32)
kernel.setArg<cl::Buffer>(index++, interactionFlags->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, interactionFlags->getDeviceBuffer());
} }
......
...@@ -123,10 +123,13 @@ public: ...@@ -123,10 +123,13 @@ public:
*/ */
void prepareInteractions(); void prepareInteractions();
/** /**
* Compute the nonbonded interactions. This will only be executed once after each call to * Compute the nonbonded interactions.
* prepareInteractions(). Additional calls return immediately without doing anything.
*/ */
void computeInteractions(); void computeInteractions();
/**
* Check to see if the neighbor list arrays are large enough, and make them bigger if necessary.
*/
void updateNeighborListSize();
/** /**
* Get the array containing the center of each atom block. * Get the array containing the center of each atom block.
*/ */
......
...@@ -47,7 +47,7 @@ __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPe ...@@ -47,7 +47,7 @@ __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPe
*/ */
void storeInteractionData(__local ushort2* buffer, __local int* valid, __local short* sum, __local ushort2* temp, __local int* baseIndex, void storeInteractionData(__local ushort2* buffer, __local int* valid, __local short* sum, __local ushort2* temp, __local int* baseIndex,
__global unsigned int* interactionCount, __global ushort2* interactingTiles, float cutoffSquared, float4 periodicBoxSize, __global unsigned int* interactionCount, __global ushort2* interactingTiles, float cutoffSquared, float4 periodicBoxSize,
float4 invPeriodicBoxSize, __global float4* posq, __global float4* blockCenter, __global float4* blockBoundingBox) { float4 invPeriodicBoxSize, __global float4* posq, __global float4* blockCenter, __global float4* blockBoundingBox, unsigned int maxTiles) {
// The buffer is full, so we need to compact it and write out results. Start by doing a parallel prefix sum. // The buffer is full, so we need to compact it and write out results. Start by doing a parallel prefix sum.
for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE) for (int i = get_local_id(0); i < BUFFER_SIZE; i += GROUP_SIZE)
...@@ -147,7 +147,7 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s ...@@ -147,7 +147,7 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
if (get_local_id(0) == 0) if (get_local_id(0) == 0)
*baseIndex = atom_add(interactionCount, numValid); *baseIndex = atom_add(interactionCount, numValid);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (*baseIndex+numValid <= MAX_TILES) if (*baseIndex+numValid <= maxTiles)
for (int i = get_local_id(0); i < numValid; i += GROUP_SIZE) for (int i = get_local_id(0); i < numValid; i += GROUP_SIZE)
interactingTiles[*baseIndex+i] = temp[i]; interactingTiles[*baseIndex+i] = temp[i];
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
...@@ -158,7 +158,7 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s ...@@ -158,7 +158,7 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
* mark them as non-interacting. * mark them as non-interacting.
*/ */
__kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* blockCenter, __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* blockCenter,
__global float4* blockBoundingBox, __global unsigned int* interactionCount, __global ushort2* interactingTiles, __global float4* posq) { __global float4* blockBoundingBox, __global unsigned int* interactionCount, __global ushort2* interactingTiles, __global float4* posq, unsigned int maxTiles) {
__local ushort2 buffer[BUFFER_SIZE]; __local ushort2 buffer[BUFFER_SIZE];
__local int valid[BUFFER_SIZE]; __local int valid[BUFFER_SIZE];
__local short sum[BUFFER_SIZE]; __local short sum[BUFFER_SIZE];
...@@ -210,14 +210,14 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox ...@@ -210,14 +210,14 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (bufferFull) { if (bufferFull) {
storeInteractionData(buffer, valid, sum, temp, &globalIndex, interactionCount, interactingTiles, cutoffSquared, periodicBoxSize, invPeriodicBoxSize, posq, blockCenter, blockBoundingBox); storeInteractionData(buffer, valid, sum, temp, &globalIndex, interactionCount, interactingTiles, cutoffSquared, periodicBoxSize, invPeriodicBoxSize, posq, blockCenter, blockBoundingBox, maxTiles);
valuesInBuffer = 0; valuesInBuffer = 0;
if (get_local_id(0) == 0) if (get_local_id(0) == 0)
bufferFull = false; bufferFull = false;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} }
} }
storeInteractionData(buffer, valid, sum, temp, &globalIndex, interactionCount, interactingTiles, cutoffSquared, periodicBoxSize, invPeriodicBoxSize, posq, blockCenter, blockBoundingBox); storeInteractionData(buffer, valid, sum, temp, &globalIndex, interactionCount, interactingTiles, cutoffSquared, periodicBoxSize, invPeriodicBoxSize, posq, blockCenter, blockBoundingBox, maxTiles);
} }
/** /**
...@@ -225,7 +225,7 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox ...@@ -225,7 +225,7 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
* flags for which ones are interacting. * flags for which ones are interacting.
*/ */
__kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* posq, __global ushort2* tiles, __global float4* blockCenter, __kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* posq, __global ushort2* tiles, __global float4* blockCenter,
__global float4* blockBoundingBox, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, __local unsigned int* flags) { __global float4* blockBoundingBox, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, __local unsigned int* flags, unsigned int maxTiles) {
unsigned int totalWarps = get_global_size(0)/TILE_SIZE; unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
unsigned int warp = get_global_id(0)/TILE_SIZE; unsigned int warp = get_global_id(0)/TILE_SIZE;
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
...@@ -233,7 +233,7 @@ __kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicB ...@@ -233,7 +233,7 @@ __kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicB
unsigned int end = (warp+1)*numTiles/totalWarps; unsigned int end = (warp+1)*numTiles/totalWarps;
unsigned int index = get_local_id(0) & (TILE_SIZE - 1); unsigned int index = get_local_id(0) & (TILE_SIZE - 1);
if (numTiles > MAX_TILES) if (numTiles > maxTiles)
return; return;
unsigned int lasty = 0xFFFFFFFF; unsigned int lasty = 0xFFFFFFFF;
float4 apos; float4 apos;
......
...@@ -17,14 +17,14 @@ typedef struct { ...@@ -17,14 +17,14 @@ typedef struct {
__kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1))) __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void computeBornSum(__global float* global_bornSum, __global float4* posq, __global float2* global_params, __local AtomData* localData, __local float* tempBuffer, void computeBornSum(__global float* global_bornSum, __global float4* posq, __global float2* global_params, __local AtomData* localData, __local float* tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize) { __global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles) {
#else #else
unsigned int numTiles) { unsigned int numTiles) {
#endif #endif
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
unsigned int pos = get_group_id(0)*(numTiles > MAX_TILES ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0); unsigned int pos = get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0);
unsigned int end = (get_group_id(0)+1)*(numTiles > MAX_TILES ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0); unsigned int end = (get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0);
#else #else
unsigned int pos = get_group_id(0)*numTiles/get_num_groups(0); unsigned int pos = get_group_id(0)*numTiles/get_num_groups(0);
unsigned int end = (get_group_id(0)+1)*numTiles/get_num_groups(0); unsigned int end = (get_group_id(0)+1)*numTiles/get_num_groups(0);
...@@ -36,7 +36,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo ...@@ -36,7 +36,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
// Extract the coordinates of this tile // Extract the coordinates of this tile
unsigned int x, y; unsigned int x, y;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= MAX_TILES) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
y = tileIndices.y; y = tileIndices.y;
...@@ -204,14 +204,14 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff ...@@ -204,14 +204,14 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
__global float4* posq, __global float* global_bornRadii, __global float4* posq, __global float* global_bornRadii,
__global float* global_bornForce, __local AtomData* localData, __local float4* tempBuffer, __global float* global_bornForce, __local AtomData* localData, __local float4* tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize) { __global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles) {
#else #else
unsigned int numTiles) { unsigned int numTiles) {
#endif #endif
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
unsigned int pos = get_group_id(0)*(numTiles > MAX_TILES ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0); unsigned int pos = get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0);
unsigned int end = (get_group_id(0)+1)*(numTiles > MAX_TILES ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0); unsigned int end = (get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0);
#else #else
unsigned int pos = get_group_id(0)*numTiles/get_num_groups(0); unsigned int pos = get_group_id(0)*numTiles/get_num_groups(0);
unsigned int end = (get_group_id(0)+1)*numTiles/get_num_groups(0); unsigned int end = (get_group_id(0)+1)*numTiles/get_num_groups(0);
...@@ -223,7 +223,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff ...@@ -223,7 +223,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
// Extract the coordinates of this tile // Extract the coordinates of this tile
unsigned int x, y; unsigned int x, y;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= MAX_TILES) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
y = tileIndices.y; y = tileIndices.y;
......
...@@ -17,7 +17,7 @@ typedef struct { ...@@ -17,7 +17,7 @@ typedef struct {
__kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1))) __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void computeBornSum(__global float* global_bornSum, __global float4* posq, __global float2* global_params, __local AtomData* localData, __local float* tempBuffer, void computeBornSum(__global float* global_bornSum, __global float4* posq, __global float2* global_params, __local AtomData* localData, __local float* tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global unsigned int* interactionFlags) { __global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global unsigned int* interactionFlags) {
#else #else
unsigned int numTiles) { unsigned int numTiles) {
#endif #endif
...@@ -25,8 +25,8 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo ...@@ -25,8 +25,8 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
unsigned int warp = get_global_id(0)/TILE_SIZE; unsigned int warp = get_global_id(0)/TILE_SIZE;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
unsigned int pos = warp*(numTiles > MAX_TILES ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps; unsigned int pos = warp*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps;
unsigned int end = (warp+1)*(numTiles > MAX_TILES ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps; unsigned int end = (warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps;
#else #else
unsigned int pos = warp*numTiles/totalWarps; unsigned int pos = warp*numTiles/totalWarps;
unsigned int end = (warp+1)*numTiles/totalWarps; unsigned int end = (warp+1)*numTiles/totalWarps;
...@@ -38,7 +38,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo ...@@ -38,7 +38,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
// Extract the coordinates of this tile // Extract the coordinates of this tile
unsigned int x, y; unsigned int x, y;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= MAX_TILES) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
y = tileIndices.y; y = tileIndices.y;
...@@ -123,7 +123,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo ...@@ -123,7 +123,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
} }
localData[get_local_id(0)].bornSum = 0.0f; localData[get_local_id(0)].bornSum = 0.0f;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int flags = (numTiles <= MAX_TILES ? interactionFlags[pos] : 0xFFFFFFFF); unsigned int flags = (numTiles <= maxTiles ? interactionFlags[pos] : 0xFFFFFFFF);
if (flags != 0xFFFFFFFF && false) { // TODO: Fix this: should be checking for exclusions if (flags != 0xFFFFFFFF && false) { // TODO: Fix this: should be checking for exclusions
if (flags == 0) { if (flags == 0) {
// No interactions in this tile. // No interactions in this tile.
...@@ -270,7 +270,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff ...@@ -270,7 +270,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
__global float4* posq, __global float* global_bornRadii, __global float4* posq, __global float* global_bornRadii,
__global float* global_bornForce, __local AtomData* localData, __local float4* tempBuffer, __global float* global_bornForce, __local AtomData* localData, __local float4* tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global unsigned int* interactionFlags) { __global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global unsigned int* interactionFlags) {
#else #else
unsigned int numTiles) { unsigned int numTiles) {
#endif #endif
...@@ -278,8 +278,8 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff ...@@ -278,8 +278,8 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
unsigned int warp = get_global_id(0)/TILE_SIZE; unsigned int warp = get_global_id(0)/TILE_SIZE;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
unsigned int pos = warp*(numTiles > MAX_TILES ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps; unsigned int pos = warp*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps;
unsigned int end = (warp+1)*(numTiles > MAX_TILES ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps; unsigned int end = (warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps;
#else #else
unsigned int pos = warp*numTiles/totalWarps; unsigned int pos = warp*numTiles/totalWarps;
unsigned int end = (warp+1)*numTiles/totalWarps; unsigned int end = (warp+1)*numTiles/totalWarps;
...@@ -291,7 +291,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff ...@@ -291,7 +291,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
// Extract the coordinates of this tile // Extract the coordinates of this tile
unsigned int x, y; unsigned int x, y;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= MAX_TILES) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
y = tileIndices.y; y = tileIndices.y;
...@@ -381,7 +381,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff ...@@ -381,7 +381,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
localData[get_local_id(0)].fz = 0.0f; localData[get_local_id(0)].fz = 0.0f;
localData[get_local_id(0)].fw = 0.0f; localData[get_local_id(0)].fw = 0.0f;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int flags = (numTiles <= MAX_TILES ? interactionFlags[pos] : 0xFFFFFFFF); unsigned int flags = (numTiles <= maxTiles ? interactionFlags[pos] : 0xFFFFFFFF);
if (flags != 0xFFFFFFFF && false) { // TODO: Fix this: should be checking for exclusions if (flags != 0xFFFFFFFF && false) { // TODO: Fix this: should be checking for exclusions
if (flags == 0) { if (flags == 0) {
// No interactions in this tile. // No interactions in this tile.
......
...@@ -15,15 +15,15 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1))) ...@@ -15,15 +15,15 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global unsigned int* exclusions, void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global unsigned int* exclusions,
__global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __local AtomData* localData, __local float4* tempBuffer, __global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __local AtomData* localData, __local float4* tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize __global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
unsigned int pos = get_group_id(0)*(numTiles > MAX_TILES ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0); unsigned int pos = get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0);
unsigned int end = (get_group_id(0)+1)*(numTiles > MAX_TILES ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0); unsigned int end = (get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0);
#else #else
unsigned int pos = get_group_id(0)*numTiles/get_num_groups(0); unsigned int pos = get_group_id(0)*numTiles/get_num_groups(0);
unsigned int end = (get_group_id(0)+1)*numTiles/get_num_groups(0); unsigned int end = (get_group_id(0)+1)*numTiles/get_num_groups(0);
...@@ -37,7 +37,7 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe ...@@ -37,7 +37,7 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
// Extract the coordinates of this tile // Extract the coordinates of this tile
unsigned int x, y; unsigned int x, y;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= MAX_TILES) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
y = tileIndices.y; y = tileIndices.y;
......
...@@ -15,7 +15,7 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1))) ...@@ -15,7 +15,7 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global unsigned int* exclusions, void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global unsigned int* exclusions,
__global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __local AtomData* localData, __local float4* tempBuffer, __global unsigned int* exclusionIndices, __global unsigned int* exclusionRowIndices, __local AtomData* localData, __local float4* tempBuffer,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global unsigned int* interactionFlags __global ushort2* tiles, __global unsigned int* interactionCount, float4 periodicBoxSize, float4 invPeriodicBoxSize, unsigned int maxTiles, __global unsigned int* interactionFlags
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
...@@ -24,8 +24,8 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe ...@@ -24,8 +24,8 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
unsigned int warp = get_global_id(0)/TILE_SIZE; unsigned int warp = get_global_id(0)/TILE_SIZE;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
unsigned int pos = warp*(numTiles > MAX_TILES ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps; unsigned int pos = warp*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps;
unsigned int end = (warp+1)*(numTiles > MAX_TILES ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps; unsigned int end = (warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*(NUM_BLOCKS+1)/2 : numTiles)/totalWarps;
#else #else
unsigned int pos = warp*numTiles/totalWarps; unsigned int pos = warp*numTiles/totalWarps;
unsigned int end = (warp+1)*numTiles/totalWarps; unsigned int end = (warp+1)*numTiles/totalWarps;
...@@ -39,7 +39,7 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe ...@@ -39,7 +39,7 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
// Extract the coordinates of this tile // Extract the coordinates of this tile
unsigned int x, y; unsigned int x, y;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= MAX_TILES) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
y = tileIndices.y; y = tileIndices.y;
...@@ -145,7 +145,7 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe ...@@ -145,7 +145,7 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
localData[get_local_id(0)].fy = 0.0f; localData[get_local_id(0)].fy = 0.0f;
localData[get_local_id(0)].fz = 0.0f; localData[get_local_id(0)].fz = 0.0f;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int flags = (numTiles <= MAX_TILES ? interactionFlags[pos] : 0xFFFFFFFF); unsigned int flags = (numTiles <= maxTiles ? interactionFlags[pos] : 0xFFFFFFFF);
if (!hasExclusions && flags != 0xFFFFFFFF) { if (!hasExclusions && flags != 0xFFFFFFFF) {
if (flags == 0) { if (flags == 0) {
// No interactions in this tile. // No interactions in this tile.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment