Commit 73183c61 authored by ChayaSt's avatar ChayaSt
Browse files

resolved conflict

parents 0e218233 32e08b87
...@@ -66,7 +66,7 @@ void verifySorting(vector<float> array) { ...@@ -66,7 +66,7 @@ void verifySorting(vector<float> array) {
system.addParticle(0.0); system.addParticle(0.0);
CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false", CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false",
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()), platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()),
platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), 1); platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), "false", 1);
CudaContext& context = *platformData.contexts[0]; CudaContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
CudaArray data(context, array.size(), 4, "sortData"); CudaArray data(context, array.size(), 4, "sortData");
......
...@@ -408,6 +408,18 @@ public: ...@@ -408,6 +408,18 @@ public:
void setStepsSinceReorder(int steps) { void setStepsSinceReorder(int steps) {
stepsSinceReorder = steps; stepsSinceReorder = steps;
} }
/**
* Get the flag that marks whether the current force evaluation is valid.
*/
bool getForcesValid() const {
return forcesValid;
}
/**
* Get the flag that marks whether the current force evaluation is valid.
*/
void setForcesValid(bool valid) {
forcesValid = valid;
}
/** /**
* Get the number of atoms. * Get the number of atoms.
*/ */
...@@ -684,7 +696,7 @@ private: ...@@ -684,7 +696,7 @@ private:
int numThreadBlocks; int numThreadBlocks;
int numForceBuffers; int numForceBuffers;
int simdWidth; int simdWidth;
bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, atomsWereReordered, boxIsTriclinic; bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, atomsWereReordered, boxIsTriclinic, forcesValid;
mm_float4 periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ; mm_float4 periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ;
mm_double4 periodicBoxSizeDouble, invPeriodicBoxSizeDouble, periodicBoxVecXDouble, periodicBoxVecYDouble, periodicBoxVecZDouble; mm_double4 periodicBoxSizeDouble, invPeriodicBoxSizeDouble, periodicBoxVecXDouble, periodicBoxVecYDouble, periodicBoxVecZDouble;
std::string defaultOptimizationOptions; std::string defaultOptimizationOptions;
......
...@@ -156,7 +156,7 @@ public: ...@@ -156,7 +156,7 @@ public:
* @param b the vector defining the second edge of the periodic box * @param b the vector defining the second edge of the periodic box
* @param c the vector defining the third edge of the periodic box * @param c the vector defining the third edge of the periodic box
*/ */
void setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c) const; void setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c);
/** /**
* Create a checkpoint recording the current state of the Context. * Create a checkpoint recording the current state of the Context.
* *
...@@ -698,7 +698,7 @@ public: ...@@ -698,7 +698,7 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force); void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force);
private: private:
void initInteractionGroups(const CustomNonbondedForce& force, const std::string& interactionSource); void initInteractionGroups(const CustomNonbondedForce& force, const std::string& interactionSource, const std::vector<std::string>& tableTypes);
OpenCLContext& cl; OpenCLContext& cl;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray* globals; OpenCLArray* globals;
......
...@@ -281,6 +281,9 @@ private: ...@@ -281,6 +281,9 @@ private:
OpenCLArray* oldPositions; OpenCLArray* oldPositions;
OpenCLArray* rebuildNeighborList; OpenCLArray* rebuildNeighborList;
OpenCLSort* blockSorter; OpenCLSort* blockSorter;
cl::Event downloadCountEvent;
cl::Buffer* pinnedCountBuffer;
int* pinnedCountMemory;
std::vector<std::vector<int> > atomExclusions; std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters; std::vector<ParameterInfo> parameters;
std::vector<ParameterInfo> arguments; std::vector<ParameterInfo> arguments;
......
...@@ -58,14 +58,14 @@ public: ...@@ -58,14 +58,14 @@ public:
* This is the name of the parameter for selecting which OpenCL device or devices to use. * This is the name of the parameter for selecting which OpenCL device or devices to use.
*/ */
static const std::string& OpenCLDeviceIndex() { static const std::string& OpenCLDeviceIndex() {
static const std::string key = "OpenCLDeviceIndex"; static const std::string key = "DeviceIndex";
return key; return key;
} }
/** /**
* This is the name of the parameter that reports the OpenCL device or devices being used. * This is the name of the parameter that reports the OpenCL device or devices being used.
*/ */
static const std::string& OpenCLDeviceName() { static const std::string& OpenCLDeviceName() {
static const std::string key = "OpenCLDeviceName"; static const std::string key = "DeviceName";
return key; return key;
} }
/** /**
...@@ -86,21 +86,21 @@ public: ...@@ -86,21 +86,21 @@ public:
* This is the name of the parameter for selecting what numerical precision to use. * This is the name of the parameter for selecting what numerical precision to use.
*/ */
static const std::string& OpenCLPrecision() { static const std::string& OpenCLPrecision() {
static const std::string key = "OpenCLPrecision"; static const std::string key = "Precision";
return key; return key;
} }
/** /**
* This is the name of the parameter for selecting whether to use the CPU based PME calculation. * This is the name of the parameter for selecting whether to use the CPU based PME calculation.
*/ */
static const std::string& OpenCLUseCpuPme() { static const std::string& OpenCLUseCpuPme() {
static const std::string key = "OpenCLUseCpuPme"; static const std::string key = "UseCpuPme";
return key; return key;
} }
/** /**
* This is the name of the parameter for selecting whether to disable use of a separate stream for PME. * This is the name of the parameter for selecting whether to disable use of a separate stream for PME.
*/ */
static const std::string& OpenCLDisablePmeStream() { static const std::string& OpenCLDisablePmeStream() {
static const std::string key = "OpenCLDisablePmeStream"; static const std::string key = "DisablePmeStream";
return key; return key;
} }
}; };
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. * * Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -84,7 +84,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -84,7 +84,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
useMixedPrecision = false; useMixedPrecision = false;
} }
else else
throw OpenMMException("Illegal value for OpenCLPrecision: "+precision); throw OpenMMException("Illegal value for Precision: "+precision);
try { try {
contextIndex = platformData.contexts.size(); contextIndex = platformData.contexts.size();
std::vector<cl::Platform> platforms; std::vector<cl::Platform> platforms;
...@@ -105,7 +105,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -105,7 +105,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
vector<cl::Device> devices; vector<cl::Device> devices;
platforms[j].getDevices(CL_DEVICE_TYPE_ALL, &devices); platforms[j].getDevices(CL_DEVICE_TYPE_ALL, &devices);
if (deviceIndex < -1 || deviceIndex >= (int) devices.size()) if (deviceIndex < -1 || deviceIndex >= (int) devices.size())
throw OpenMMException("Illegal value for OpenCLDeviceIndex: "+intToString(deviceIndex)); throw OpenMMException("Illegal value for DeviceIndex: "+intToString(deviceIndex));
for (int i = 0; i < (int) devices.size(); i++) { for (int i = 0; i < (int) devices.size(); i++) {
// If they supplied a valid deviceIndex, we only look through that one // If they supplied a valid deviceIndex, we only look through that one
...@@ -113,6 +113,11 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -113,6 +113,11 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
continue; continue;
if (platformVendor == "Apple" && (devices[i].getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU)) if (platformVendor == "Apple" && (devices[i].getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU))
continue; // The CPU device on OS X won't work correctly. continue; // The CPU device on OS X won't work correctly.
if (useMixedPrecision || useDoublePrecision) {
bool supportsDouble = (devices[i].getInfo<CL_DEVICE_EXTENSIONS>().find("cl_khr_fp64") != string::npos);
if (!supportsDouble)
continue; // This device does not support double precision.
}
int maxSize = devices[i].getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>()[0]; int maxSize = devices[i].getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>()[0];
int processingElementsPerComputeUnit = 8; int processingElementsPerComputeUnit = 8;
if (devices[i].getInfo<CL_DEVICE_TYPE>() != CL_DEVICE_TYPE_GPU) { if (devices[i].getInfo<CL_DEVICE_TYPE>() != CL_DEVICE_TYPE_GPU) {
...@@ -1047,7 +1052,6 @@ void OpenCLContext::reorderAtoms() { ...@@ -1047,7 +1052,6 @@ void OpenCLContext::reorderAtoms() {
reorderAtomsImpl<cl_float, mm_float4, cl_double, mm_double4>(); reorderAtomsImpl<cl_float, mm_float4, cl_double, mm_double4>();
else else
reorderAtomsImpl<cl_float, mm_float4, cl_float, mm_float4>(); reorderAtomsImpl<cl_float, mm_float4, cl_float, mm_float4>();
nonbonded->updateNeighborListSize();
} }
template <class Real, class Real4, class Mixed, class Mixed4> template <class Real, class Real4, class Mixed, class Mixed4>
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. * * Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -174,8 +174,8 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre ...@@ -174,8 +174,8 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out << "if (x >= " << paramsFloat[2] << " && x <= " << paramsFloat[3] << " && y >= " << paramsFloat[4] << " && y <= " << paramsFloat[5] << ") {\n"; out << "if (x >= " << paramsFloat[2] << " && x <= " << paramsFloat[3] << " && y >= " << paramsFloat[4] << " && y <= " << paramsFloat[5] << ") {\n";
out << "x = (x - " << paramsFloat[2] << ")*" << paramsFloat[6] << ";\n"; out << "x = (x - " << paramsFloat[2] << ")*" << paramsFloat[6] << ";\n";
out << "y = (y - " << paramsFloat[4] << ")*" << paramsFloat[7] << ";\n"; out << "y = (y - " << paramsFloat[4] << ")*" << paramsFloat[7] << ";\n";
out << "int s = min((int) floor(x), " << paramsInt[0] << ");\n"; out << "int s = min((int) floor(x), " << paramsInt[0] << "-1);\n";
out << "int t = min((int) floor(y), " << paramsInt[1] << ");\n"; out << "int t = min((int) floor(y), " << paramsInt[1] << "-1);\n";
out << "int coeffIndex = 4*(s+" << paramsInt[0] << "*t);\n"; out << "int coeffIndex = 4*(s+" << paramsInt[0] << "*t);\n";
out << "float4 c[4];\n"; out << "float4 c[4];\n";
for (int j = 0; j < 4; j++) for (int j = 0; j < 4; j++)
...@@ -217,9 +217,9 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre ...@@ -217,9 +217,9 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out << "x = (x - " << paramsFloat[3] << ")*" << paramsFloat[9] << ";\n"; out << "x = (x - " << paramsFloat[3] << ")*" << paramsFloat[9] << ";\n";
out << "y = (y - " << paramsFloat[5] << ")*" << paramsFloat[10] << ";\n"; out << "y = (y - " << paramsFloat[5] << ")*" << paramsFloat[10] << ";\n";
out << "z = (z - " << paramsFloat[7] << ")*" << paramsFloat[11] << ";\n"; out << "z = (z - " << paramsFloat[7] << ")*" << paramsFloat[11] << ";\n";
out << "int s = min((int) floor(x), " << paramsInt[0] << ");\n"; out << "int s = min((int) floor(x), " << paramsInt[0] << "-1);\n";
out << "int t = min((int) floor(y), " << paramsInt[1] << ");\n"; out << "int t = min((int) floor(y), " << paramsInt[1] << "-1);\n";
out << "int u = min((int) floor(z), " << paramsInt[2] << ");\n"; out << "int u = min((int) floor(z), " << paramsInt[2] << "-1);\n";
out << "int coeffIndex = 16*(s+" << paramsInt[0] << "*(t+" << paramsInt[1] << "*u));\n"; out << "int coeffIndex = 16*(s+" << paramsInt[0] << "*(t+" << paramsInt[1] << "*u));\n";
out << "float4 c[16];\n"; out << "float4 c[16];\n";
for (int j = 0; j < 16; j++) for (int j = 0; j < 16; j++)
...@@ -254,7 +254,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre ...@@ -254,7 +254,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
for (int k = 3; k >= 0; k--) for (int k = 3; k >= 0; k--)
for (int m = 0; m < 4; m++) { for (int m = 0; m < 4; m++) {
int base = 4*m; int base = 4*m;
string suffix = suffixes[m]; string suffix = suffixes[k];
out << "derivy[" << m << "] = da*derivy[" << m << "] + (3*c[" << (base+3) << "]" << suffix << "*db + 2*c[" << (base+2) << "]" << suffix << ")*db + c[" << (base+1) << "]" << suffix << ";\n"; out << "derivy[" << m << "] = da*derivy[" << m << "] + (3*c[" << (base+3) << "]" << suffix << "*db + 2*c[" << (base+2) << "]" << suffix << ")*db + c[" << (base+1) << "]" << suffix << ";\n";
} }
out << nodeNames[j] << " = derivy[0] + dc*(derivy[1] + dc*(derivy[2] + dc*derivy[3]));\n"; out << nodeNames[j] << " = derivy[0] + dc*(derivy[1] + dc*(derivy[2] + dc*derivy[3]));\n";
...@@ -271,7 +271,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre ...@@ -271,7 +271,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out << nodeNames[j] << " *= " << paramsFloat[11] << ";\n"; out << nodeNames[j] << " *= " << paramsFloat[11] << ";\n";
} }
else else
throw OpenMMException("Unsupported derivative order for Continuous2DFunction"); throw OpenMMException("Unsupported derivative order for Continuous3DFunction");
} }
out << "}\n"; out << "}\n";
} }
......
This diff is collapsed.
...@@ -57,7 +57,7 @@ private: ...@@ -57,7 +57,7 @@ private:
OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : context(context), useCutoff(false), usePeriodic(false), anyExclusions(false), usePadding(true), OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : context(context), useCutoff(false), usePeriodic(false), anyExclusions(false), usePadding(true),
numForceBuffers(0), exclusionIndices(NULL), exclusionRowIndices(NULL), exclusionTiles(NULL), exclusions(NULL), interactingTiles(NULL), interactingAtoms(NULL), numForceBuffers(0), exclusionIndices(NULL), exclusionRowIndices(NULL), exclusionTiles(NULL), exclusions(NULL), interactingTiles(NULL), interactingAtoms(NULL),
interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL), sortedBlocks(NULL), sortedBlockCenter(NULL), sortedBlockBoundingBox(NULL), interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL), sortedBlocks(NULL), sortedBlockCenter(NULL), sortedBlockBoundingBox(NULL),
oldPositions(NULL), rebuildNeighborList(NULL), blockSorter(NULL), forceRebuildNeighborList(true), lastCutoff(0.0), groupFlags(0) { oldPositions(NULL), rebuildNeighborList(NULL), blockSorter(NULL), pinnedCountBuffer(NULL), pinnedCountMemory(NULL), forceRebuildNeighborList(true), lastCutoff(0.0), groupFlags(0) {
// Decide how many thread blocks and force buffers to use. // Decide how many thread blocks and force buffers to use.
deviceIsCpu = (context.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU); deviceIsCpu = (context.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
...@@ -90,6 +90,8 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con ...@@ -90,6 +90,8 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con
numForceBuffers = numForceThreadBlocks*forceThreadBlockSize/OpenCLContext::TileSize; numForceBuffers = numForceThreadBlocks*forceThreadBlockSize/OpenCLContext::TileSize;
} }
} }
pinnedCountBuffer = new cl::Buffer(context.getContext(), CL_MEM_ALLOC_HOST_PTR, sizeof(int));
pinnedCountMemory = (int*) context.getQueue().enqueueMapBuffer(*pinnedCountBuffer, CL_TRUE, CL_MAP_READ, 0, sizeof(int));
} }
OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() { OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() {
...@@ -123,6 +125,8 @@ OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() { ...@@ -123,6 +125,8 @@ OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() {
delete rebuildNeighborList; delete rebuildNeighborList;
if (blockSorter != NULL) if (blockSorter != NULL)
delete blockSorter; delete blockSorter;
if (pinnedCountBuffer != NULL)
delete pinnedCountBuffer;
} }
void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup) { void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup) {
...@@ -357,20 +361,16 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) { ...@@ -357,20 +361,16 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
if (lastCutoff != kernels.cutoffDistance) if (lastCutoff != kernels.cutoffDistance)
forceRebuildNeighborList = true; forceRebuildNeighborList = true;
bool rebuild = false; setPeriodicBoxArgs(context, kernels.findBlockBoundsKernel, 1);
do { context.executeKernel(kernels.findBlockBoundsKernel, context.getNumAtoms());
setPeriodicBoxArgs(context, kernels.findBlockBoundsKernel, 1); blockSorter->sort(*sortedBlocks);
context.executeKernel(kernels.findBlockBoundsKernel, context.getNumAtoms()); kernels.sortBoxDataKernel.setArg<cl_int>(9, forceRebuildNeighborList);
blockSorter->sort(*sortedBlocks); context.executeKernel(kernels.sortBoxDataKernel, context.getNumAtoms());
kernels.sortBoxDataKernel.setArg<cl_int>(9, forceRebuildNeighborList); setPeriodicBoxArgs(context, kernels.findInteractingBlocksKernel, 0);
context.executeKernel(kernels.sortBoxDataKernel, context.getNumAtoms()); context.executeKernel(kernels.findInteractingBlocksKernel, context.getNumAtoms(), interactingBlocksThreadBlockSize);
setPeriodicBoxArgs(context, kernels.findInteractingBlocksKernel, 0); forceRebuildNeighborList = false;
context.executeKernel(kernels.findInteractingBlocksKernel, context.getNumAtoms(), interactingBlocksThreadBlockSize);
forceRebuildNeighborList = false;
if (context.getComputeForceCount() == 1)
rebuild = updateNeighborListSize(); // This is the first time step, so check whether our initial guess was large enough.
} while (rebuild);
lastCutoff = kernels.cutoffDistance; lastCutoff = kernels.cutoffDistance;
context.getQueue().enqueueReadBuffer(interactionCount->getDeviceBuffer(), CL_FALSE, 0, sizeof(int), pinnedCountMemory, NULL, &downloadCountEvent);
} }
void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool includeForces, bool includeEnergy) { void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool includeForces, bool includeEnergy) {
...@@ -385,20 +385,22 @@ void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool include ...@@ -385,20 +385,22 @@ void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool include
setPeriodicBoxArgs(context, kernel, 9); setPeriodicBoxArgs(context, kernel, 9);
context.executeKernel(kernel, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize); context.executeKernel(kernel, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
} }
if (useCutoff && numTiles > 0) {
downloadCountEvent.wait();
updateNeighborListSize();
}
} }
bool OpenCLNonbondedUtilities::updateNeighborListSize() { bool OpenCLNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff) if (!useCutoff)
return false; return false;
unsigned int* pinnedInteractionCount = (unsigned int*) context.getPinnedBuffer(); if (pinnedCountMemory[0] <= (unsigned int) interactingTiles->getSize())
interactionCount->download(pinnedInteractionCount);
if (pinnedInteractionCount[0] <= (unsigned int) interactingTiles->getSize())
return false; return false;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent // The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future. // this from happening in the future.
int maxTiles = (int) (1.2*pinnedInteractionCount[0]); int maxTiles = (int) (1.2*pinnedCountMemory[0]);
int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2; int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2;
if (maxTiles > totalTiles) if (maxTiles > totalTiles)
maxTiles = totalTiles; maxTiles = totalTiles;
...@@ -430,6 +432,7 @@ bool OpenCLNonbondedUtilities::updateNeighborListSize() { ...@@ -430,6 +432,7 @@ bool OpenCLNonbondedUtilities::updateNeighborListSize() {
kernels.findInteractingBlocksKernel.setArg<cl_uint>(9, maxTiles); kernels.findInteractingBlocksKernel.setArg<cl_uint>(9, maxTiles);
} }
forceRebuildNeighborList = true; forceRebuildNeighborList = true;
context.setForcesValid(false);
return true; return true;
} }
......
...@@ -56,6 +56,11 @@ extern "C" OPENMM_EXPORT_OPENCL void registerPlatforms() { ...@@ -56,6 +56,11 @@ extern "C" OPENMM_EXPORT_OPENCL void registerPlatforms() {
#endif #endif
OpenCLPlatform::OpenCLPlatform() { OpenCLPlatform::OpenCLPlatform() {
deprecatedPropertyReplacements["OpenCLDeviceIndex"] = OpenCLDeviceIndex();
deprecatedPropertyReplacements["OpenCLDeviceName"] = OpenCLDeviceName();
deprecatedPropertyReplacements["OpenCLPrecision"] = OpenCLPrecision();
deprecatedPropertyReplacements["OpenCLUseCpuPme"] = OpenCLUseCpuPme();
deprecatedPropertyReplacements["OpenCLDisablePmeStream"] = OpenCLDisablePmeStream();
OpenCLKernelFactory* factory = new OpenCLKernelFactory(); OpenCLKernelFactory* factory = new OpenCLKernelFactory();
registerKernelFactory(CalcForcesAndEnergyKernel::Name(), factory); registerKernelFactory(CalcForcesAndEnergyKernel::Name(), factory);
registerKernelFactory(UpdateStateDataKernel::Name(), factory); registerKernelFactory(UpdateStateDataKernel::Name(), factory);
...@@ -139,7 +144,10 @@ bool OpenCLPlatform::isPlatformSupported() { ...@@ -139,7 +144,10 @@ bool OpenCLPlatform::isPlatformSupported() {
const string& OpenCLPlatform::getPropertyValue(const Context& context, const string& property) const { const string& OpenCLPlatform::getPropertyValue(const Context& context, const string& property) const {
const ContextImpl& impl = getContextImpl(context); const ContextImpl& impl = getContextImpl(context);
const PlatformData* data = reinterpret_cast<const PlatformData*>(impl.getPlatformData()); const PlatformData* data = reinterpret_cast<const PlatformData*>(impl.getPlatformData());
map<string, string>::const_iterator value = data->propertyValues.find(property); string propertyName = property;
if (deprecatedPropertyReplacements.find(property) != deprecatedPropertyReplacements.end())
propertyName = deprecatedPropertyReplacements.find(property)->second;
map<string, string>::const_iterator value = data->propertyValues.find(propertyName);
if (value != data->propertyValues.end()) if (value != data->propertyValues.end())
return value->second; return value->second;
return Platform::getPropertyValue(context, property); return Platform::getPropertyValue(context, property);
......
real4 v0 = pos2-pos1; real4 v0 = pos2-pos1;
real4 v1 = pos2-pos3; real4 v1 = pos2-pos3;
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0)
APPLY_PERIODIC_TO_DELTA(v1)
#endif
real4 cp = cross(v0, v1); real4 cp = cross(v0, v1);
real rp = cp.x*cp.x + cp.y*cp.y + cp.z*cp.z; real rp = cp.x*cp.x + cp.y*cp.y + cp.z*cp.z;
rp = max(SQRT(rp), (real) 1.0e-06f); rp = max(SQRT(rp), (real) 1.0e-06f);
......
real4 delta = pos2-pos1; real4 delta = pos2-pos1;
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r = SQRT(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z); real r = SQRT(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
COMPUTE_FORCE COMPUTE_FORCE
dEdR = (r > 0.0f) ? (dEdR / r) : 0.0f; dEdR = (r > 0.0f) ? (dEdR / r) : 0.0f;
......
...@@ -5,6 +5,11 @@ const real PI = 3.14159265358979323846f; ...@@ -5,6 +5,11 @@ const real PI = 3.14159265358979323846f;
real4 v0a = (real4) (pos1.xyz-pos2.xyz, 0.0f); real4 v0a = (real4) (pos1.xyz-pos2.xyz, 0.0f);
real4 v1a = (real4) (pos3.xyz-pos2.xyz, 0.0f); real4 v1a = (real4) (pos3.xyz-pos2.xyz, 0.0f);
real4 v2a = (real4) (pos3.xyz-pos4.xyz, 0.0f); real4 v2a = (real4) (pos3.xyz-pos4.xyz, 0.0f);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0a)
APPLY_PERIODIC_TO_DELTA(v1a)
APPLY_PERIODIC_TO_DELTA(v2a)
#endif
real4 cp0a = cross(v0a, v1a); real4 cp0a = cross(v0a, v1a);
real4 cp1a = cross(v1a, v2a); real4 cp1a = cross(v1a, v2a);
real cosangle = dot(normalize(cp0a), normalize(cp1a)); real cosangle = dot(normalize(cp0a), normalize(cp1a));
...@@ -28,6 +33,11 @@ angleA = fmod(angleA+2.0f*PI, 2.0f*PI); ...@@ -28,6 +33,11 @@ angleA = fmod(angleA+2.0f*PI, 2.0f*PI);
real4 v0b = (real4) (pos5.xyz-pos6.xyz, 0.0f); real4 v0b = (real4) (pos5.xyz-pos6.xyz, 0.0f);
real4 v1b = (real4) (pos7.xyz-pos6.xyz, 0.0f); real4 v1b = (real4) (pos7.xyz-pos6.xyz, 0.0f);
real4 v2b = (real4) (pos7.xyz-pos8.xyz, 0.0f); real4 v2b = (real4) (pos7.xyz-pos8.xyz, 0.0f);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0b)
APPLY_PERIODIC_TO_DELTA(v1b)
APPLY_PERIODIC_TO_DELTA(v2b)
#endif
real4 cp0b = cross(v0b, v1b); real4 cp0b = cross(v0b, v1b);
real4 cp1b = cross(v1b, v2b); real4 cp1b = cross(v1b, v2b);
cosangle = dot(normalize(cp0b), normalize(cp1b)); cosangle = dot(normalize(cp0b), normalize(cp1b));
......
...@@ -70,8 +70,11 @@ __kernel void computeGroupCenters(__global const real4* restrict posq, __global ...@@ -70,8 +70,11 @@ __kernel void computeGroupCenters(__global const real4* restrict posq, __global
/** /**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude. * Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/ */
real4 delta(real4 vec1, real4 vec2) { real4 delta(real4 vec1, real4 vec2, bool periodic, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0); real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0);
if (periodic)
APPLY_PERIODIC_TO_DELTA(result);
result.w = result.x*result.x + result.y*result.y + result.z*result.z; result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result; return result;
} }
...@@ -110,7 +113,7 @@ real4 computeCross(real4 vec1, real4 vec2) { ...@@ -110,7 +113,7 @@ real4 computeCross(real4 vec1, real4 vec2) {
* Compute the forces on groups based on the bonds. * Compute the forces on groups based on the bonds.
*/ */
__kernel void computeGroupForces(__global long* restrict groupForce, __global mixed* restrict energyBuffer, __global const real4* restrict centerPositions, __kernel void computeGroupForces(__global long* restrict groupForce, __global mixed* restrict energyBuffer, __global const real4* restrict centerPositions,
__global const int* restrict bondGroups __global const int* restrict bondGroups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
EXTRA_ARGS) { EXTRA_ARGS) {
mixed energy = 0; mixed energy = 0;
for (int index = get_global_id(0); index < NUM_BONDS; index += get_global_size(0)) { for (int index = get_global_id(0); index < NUM_BONDS; index += get_global_size(0)) {
......
/** /**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude. * Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/ */
real4 ccb_delta(real4 vec1, real4 vec2) { real4 ccb_delta(real4 vec1, real4 vec2, bool periodic, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0); real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0);
if (periodic)
APPLY_PERIODIC_TO_DELTA(result);
result.w = result.x*result.x + result.y*result.y + result.z*result.z; result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result; return result;
} }
......
...@@ -181,6 +181,8 @@ __kernel void computeN2Energy( ...@@ -181,6 +181,8 @@ __kernel void computeN2Energy(
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps); int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps); int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else #else
...@@ -204,42 +206,38 @@ __kernel void computeN2Energy( ...@@ -204,42 +206,38 @@ __kernel void computeN2Energy(
int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { x = tiles[pos];
x = tiles[pos]; real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x]; singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF); #else
} y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
else x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
#endif if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
{ y += (x < y ? -1 : 1);
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. }
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed. // Skip over tiles that have exclusions, since they were already processed.
SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) {
SYNC_WARPS; SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) { if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
SYNC_WARPS; ushort2 tile = exclusionTiles[skipBase+tgx];
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) { skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
ushort2 tile = exclusionTiles[skipBase+tgx];
skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
} }
while (skipTiles[currentSkipIndex] < pos) else
currentSkipIndex++; skipTiles[get_local_id(0)] = end;
includeTile = (skipTiles[currentSkipIndex] != pos); skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
} }
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
#endif
if (includeTile) { if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx; unsigned int atom1 = x*TILE_SIZE + tgx;
......
...@@ -201,6 +201,8 @@ __kernel void computeN2Energy( ...@@ -201,6 +201,8 @@ __kernel void computeN2Energy(
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0]; const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0)); int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0)); int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
#else #else
...@@ -220,35 +222,31 @@ __kernel void computeN2Energy( ...@@ -220,35 +222,31 @@ __kernel void computeN2Energy(
int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { x = tiles[pos];
x = tiles[pos]; real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x]; singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF); #else
} y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
else x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
#endif if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
{ y += (x < y ? -1 : 1);
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. }
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed. // Skip over tiles that have exclusions, since they were already processed.
while (nextToSkip < pos) { while (nextToSkip < pos) {
if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) { if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[currentSkipIndex++]; ushort2 tile = exclusionTiles[currentSkipIndex++];
nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2; nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
nextToSkip = end;
} }
includeTile = (nextToSkip != pos); else
nextToSkip = end;
} }
includeTile = (nextToSkip != pos);
#endif
if (includeTile) { if (includeTile) {
// Load the data for this tile. // Load the data for this tile.
......
...@@ -157,6 +157,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -157,6 +157,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps); int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps); int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else #else
...@@ -178,42 +180,38 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -178,42 +180,38 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { x = tiles[pos];
x = tiles[pos]; real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x]; singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF); #else
} y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
else x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
#endif if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
{ y += (x < y ? -1 : 1);
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. }
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed. // Skip over tiles that have exclusions, since they were already processed.
SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) {
SYNC_WARPS; SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) { if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
SYNC_WARPS; ushort2 tile = exclusionTiles[skipBase+tgx];
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) { skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
ushort2 tile = exclusionTiles[skipBase+tgx];
skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
} }
while (skipTiles[currentSkipIndex] < pos) else
currentSkipIndex++; skipTiles[get_local_id(0)] = end;
includeTile = (skipTiles[currentSkipIndex] != pos); skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
} }
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
#endif
if (includeTile) { if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx; unsigned int atom1 = x*TILE_SIZE + tgx;
......
...@@ -170,6 +170,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -170,6 +170,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0]; const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0)); int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0)); int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
#else #else
...@@ -188,35 +190,31 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -188,35 +190,31 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { x = tiles[pos];
x = tiles[pos]; real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x]; singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF); #else
} y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
else x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
#endif if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
{ y += (x < y ? -1 : 1);
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. }
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed. // Skip over tiles that have exclusions, since they were already processed.
while (nextToSkip < pos) { while (nextToSkip < pos) {
if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) { if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[currentSkipIndex++]; ushort2 tile = exclusionTiles[currentSkipIndex++];
nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2; nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
nextToSkip = end;
} }
includeTile = (nextToSkip != pos); else
nextToSkip = end;
} }
includeTile = (nextToSkip != pos);
#endif
if (includeTile) { if (includeTile) {
// Load the data for this tile. // Load the data for this tile.
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment