Unverified Commit 995c6318 authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Fixed potential invalid memory access (#3428)

* Fixed potential invalid memory access

* Fixed exception
parent 446aaeb4
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2019-2021 Stanford University and the Authors. * * Portions copyright (c) 2019-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -51,7 +51,7 @@ public: ...@@ -51,7 +51,7 @@ public:
* @param elementSize the size of each element in bytes * @param elementSize the size of each element in bytes
* @param name the name of the array * @param name the name of the array
*/ */
virtual void initialize(ComputeContext& context, int size, int elementSize, const std::string& name) = 0; virtual void initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name) = 0;
/** /**
* Initialize this object. The template argument is the data type of each array element. * Initialize this object. The template argument is the data type of each array element.
* *
...@@ -60,13 +60,13 @@ public: ...@@ -60,13 +60,13 @@ public:
* @param name the name of the array * @param name the name of the array
*/ */
template <class T> template <class T>
void initialize(ComputeContext& context, int size, const std::string& name) { void initialize(ComputeContext& context, size_t size, const std::string& name) {
initialize(context, size, sizeof(T), name); initialize(context, size, sizeof(T), name);
} }
/** /**
* Recreate the internal storage to have a different size. * Recreate the internal storage to have a different size.
*/ */
virtual void resize(int size) = 0; virtual void resize(size_t size) = 0;
/** /**
* Get whether this array has been initialized. * Get whether this array has been initialized.
*/ */
...@@ -74,7 +74,7 @@ public: ...@@ -74,7 +74,7 @@ public:
/** /**
* Get the number of elements in the array. * Get the number of elements in the array.
*/ */
virtual int getSize() const = 0; virtual size_t getSize() const = 0;
/** /**
* Get the size of each element in bytes. * Get the size of each element in bytes.
*/ */
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2019-2021 Stanford University and the Authors. * * Portions copyright (c) 2019-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -60,7 +60,7 @@ public: ...@@ -60,7 +60,7 @@ public:
* @param elementSize the size of each element in bytes * @param elementSize the size of each element in bytes
* @param name the name of the array * @param name the name of the array
*/ */
void initialize(ComputeContext& context, int size, int elementSize, const std::string& name); void initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name);
/** /**
* Initialize this object. The template argument is the data type of each array element. * Initialize this object. The template argument is the data type of each array element.
* *
...@@ -69,13 +69,13 @@ public: ...@@ -69,13 +69,13 @@ public:
* @param name the name of the array * @param name the name of the array
*/ */
template <class T> template <class T>
void initialize(ComputeContext& context, int size, const std::string& name) { void initialize(ComputeContext& context, size_t size, const std::string& name) {
initialize(context, size, sizeof(T), name); initialize(context, size, sizeof(T), name);
} }
/** /**
* Recreate the internal storage to have a different size. * Recreate the internal storage to have a different size.
*/ */
void resize(int size); void resize(size_t size);
/** /**
* Get whether this array has been initialized. * Get whether this array has been initialized.
*/ */
...@@ -83,7 +83,7 @@ public: ...@@ -83,7 +83,7 @@ public:
/** /**
* Get the number of elements in the array. * Get the number of elements in the array.
*/ */
int getSize() const; size_t getSize() const;
/** /**
* Get the size of each element in bytes. * Get the size of each element in bytes.
*/ */
......
...@@ -4734,7 +4734,7 @@ double CommonCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo ...@@ -4734,7 +4734,7 @@ double CommonCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
startIndicesKernel->execute(256, 256); startIndicesKernel->execute(256, 256);
copyPairsKernel->execute(maxNeighborPairs); copyPairsKernel->execute(maxNeighborPairs);
} }
int maxThreads = min(cc.getNumAtoms()*forceWorkgroupSize, cc.getEnergyBuffer().getSize()); int maxThreads = min(cc.getNumAtoms()*forceWorkgroupSize, (int) cc.getEnergyBuffer().getSize());
forceKernel->execute(maxThreads, forceWorkgroupSize); forceKernel->execute(maxThreads, forceWorkgroupSize);
if (nonbondedMethod != NoCutoff) { if (nonbondedMethod != NoCutoff) {
// Make sure there was enough memory for the neighbor list. // Make sure there was enough memory for the neighbor list.
...@@ -5290,7 +5290,7 @@ void CommonCalcCustomCVForceKernel::initialize(const System& system, const Custo ...@@ -5290,7 +5290,7 @@ void CommonCalcCustomCVForceKernel::initialize(const System& system, const Custo
copyForcesKernel->addArg(cc.getPaddedNumAtoms()); copyForcesKernel->addArg(cc.getPaddedNumAtoms());
addForcesKernel = program->createKernel("addForces"); addForcesKernel = program->createKernel("addForces");
addForcesKernel->addArg(cc.getLongForceBuffer()); addForcesKernel->addArg(cc.getLongForceBuffer());
addForcesKernel->addArg(cc.getLongForceBuffer().getSize()); addForcesKernel->addArg((int) cc.getLongForceBuffer().getSize());
for (int i = 0; i < numCVs; i++) { for (int i = 0; i < numCVs; i++) {
addForcesKernel->addArg(); addForcesKernel->addArg();
addForcesKernel->addArg(); addForcesKernel->addArg();
...@@ -6143,7 +6143,7 @@ std::pair<double, double> CommonIntegrateNoseHooverStepKernel::computeMaskedKine ...@@ -6143,7 +6143,7 @@ std::pair<double, double> CommonIntegrateNoseHooverStepKernel::computeMaskedKine
reduceEnergyKernel->addArg(energyBuffer); reduceEnergyKernel->addArg(energyBuffer);
reduceEnergyKernel->addArg(kineticEnergyBuffer); reduceEnergyKernel->addArg(kineticEnergyBuffer);
reduceEnergyKernel->addArg(energyBuffer.getSize()); reduceEnergyKernel->addArg((int) energyBuffer.getSize());
} }
cc.clearBuffer(energyBuffer); cc.clearBuffer(energyBuffer);
...@@ -6507,7 +6507,7 @@ void CommonIntegrateVariableLangevinStepKernel::initialize(const System& system, ...@@ -6507,7 +6507,7 @@ void CommonIntegrateVariableLangevinStepKernel::initialize(const System& system,
selectSizeKernel = program->createKernel("selectLangevinStepSize"); selectSizeKernel = program->createKernel("selectLangevinStepSize");
params.initialize(cc, 3, cc.getUseDoublePrecision() || cc.getUseMixedPrecision() ? sizeof(double) : sizeof(float), "langevinParams"); params.initialize(cc, 3, cc.getUseDoublePrecision() || cc.getUseMixedPrecision() ? sizeof(double) : sizeof(float), "langevinParams");
blockSize = min(256, system.getNumParticles()); blockSize = min(256, system.getNumParticles());
blockSize = max(blockSize, params.getSize()); blockSize = max(blockSize, (int) params.getSize());
} }
double CommonIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) { double CommonIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) {
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2019-2021 Stanford University and the Authors. * * Portions copyright (c) 2019-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -43,14 +43,14 @@ ArrayInterface& ComputeArray::getArray() { ...@@ -43,14 +43,14 @@ ArrayInterface& ComputeArray::getArray() {
return *impl; return *impl;
} }
void ComputeArray::initialize(ComputeContext& context, int size, int elementSize, const std::string& name) { void ComputeArray::initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name) {
if (impl != NULL) if (impl != NULL)
throw OpenMMException("The array "+getName()+" has already been initialized"); throw OpenMMException("The array "+getName()+" has already been initialized");
impl = context.createArray(); impl = context.createArray();
impl->initialize(context, size, elementSize, name); impl->initialize(context, size, elementSize, name);
} }
void ComputeArray::resize(int size) { void ComputeArray::resize(size_t size) {
if (impl == NULL) if (impl == NULL)
throw OpenMMException("ComputeArray has not been initialized"); throw OpenMMException("ComputeArray has not been initialized");
impl->resize(size); impl->resize(size);
...@@ -60,7 +60,7 @@ bool ComputeArray::isInitialized() const { ...@@ -60,7 +60,7 @@ bool ComputeArray::isInitialized() const {
return (impl != NULL); return (impl != NULL);
} }
int ComputeArray::getSize() const { size_t ComputeArray::getSize() const {
if (impl == NULL) if (impl == NULL)
throw OpenMMException("ComputeArray has not been initialized"); throw OpenMMException("ComputeArray has not been initialized");
return impl->getSize(); return impl->getSize();
......
...@@ -602,7 +602,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System ...@@ -602,7 +602,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
// Set arguments for constraint kernels. // Set arguments for constraint kernels.
if (settleAtoms.isInitialized()) { if (settleAtoms.isInitialized()) {
settlePosKernel->addArg(settleAtoms.getSize()); settlePosKernel->addArg((int) settleAtoms.getSize());
settlePosKernel->addArg(); settlePosKernel->addArg();
settlePosKernel->addArg(context.getPosq()); settlePosKernel->addArg(context.getPosq());
settlePosKernel->addArg(posDelta); settlePosKernel->addArg(posDelta);
...@@ -611,7 +611,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System ...@@ -611,7 +611,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
settlePosKernel->addArg(settleParams); settlePosKernel->addArg(settleParams);
if (context.getUseMixedPrecision()) if (context.getUseMixedPrecision())
settlePosKernel->addArg(context.getPosqCorrection()); settlePosKernel->addArg(context.getPosqCorrection());
settleVelKernel->addArg(settleAtoms.getSize()); settleVelKernel->addArg((int) settleAtoms.getSize());
settleVelKernel->addArg(); settleVelKernel->addArg();
settleVelKernel->addArg(context.getPosq()); settleVelKernel->addArg(context.getPosq());
settleVelKernel->addArg(posDelta); settleVelKernel->addArg(posDelta);
...@@ -622,7 +622,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System ...@@ -622,7 +622,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
settleVelKernel->addArg(context.getPosqCorrection()); settleVelKernel->addArg(context.getPosqCorrection());
} }
if (shakeAtoms.isInitialized()) { if (shakeAtoms.isInitialized()) {
shakePosKernel->addArg(shakeAtoms.getSize()); shakePosKernel->addArg((int) shakeAtoms.getSize());
shakePosKernel->addArg(); shakePosKernel->addArg();
shakePosKernel->addArg(context.getPosq()); shakePosKernel->addArg(context.getPosq());
shakePosKernel->addArg(posDelta); shakePosKernel->addArg(posDelta);
...@@ -630,7 +630,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System ...@@ -630,7 +630,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
shakePosKernel->addArg(shakeParams); shakePosKernel->addArg(shakeParams);
if (context.getUseMixedPrecision()) if (context.getUseMixedPrecision())
shakePosKernel->addArg(context.getPosqCorrection()); shakePosKernel->addArg(context.getPosqCorrection());
shakeVelKernel->addArg(shakeAtoms.getSize()); shakeVelKernel->addArg((int) shakeAtoms.getSize());
shakeVelKernel->addArg(); shakeVelKernel->addArg();
shakeVelKernel->addArg(context.getPosq()); shakeVelKernel->addArg(context.getPosq());
shakeVelKernel->addArg(context.getVelm()); shakeVelKernel->addArg(context.getVelm());
...@@ -755,7 +755,7 @@ void IntegrationUtilities::initRandomNumberGenerator(unsigned int randomNumberSe ...@@ -755,7 +755,7 @@ void IntegrationUtilities::initRandomNumberGenerator(unsigned int randomNumberSe
random.initialize<mm_float4>(context, 4*context.getPaddedNumAtoms(), "random"); random.initialize<mm_float4>(context, 4*context.getPaddedNumAtoms(), "random");
randomSeed.initialize<mm_int4>(context, context.getNumThreadBlocks()*64, "randomSeed"); randomSeed.initialize<mm_int4>(context, context.getNumThreadBlocks()*64, "randomSeed");
randomPos = random.getSize(); randomPos = random.getSize();
randomKernel->addArg(random.getSize()); randomKernel->addArg((int) random.getSize());
randomKernel->addArg(random); randomKernel->addArg(random);
randomKernel->addArg(randomSeed); randomKernel->addArg(randomSeed);
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2021 Stanford University and the Authors. * * Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -55,7 +55,7 @@ public: ...@@ -55,7 +55,7 @@ public:
* @param name the name of the array * @param name the name of the array
*/ */
template <class T> template <class T>
static CudaArray* create(CudaContext& context, int size, const std::string& name) { static CudaArray* create(CudaContext& context, size_t size, const std::string& name) {
return new CudaArray(context, size, sizeof(T), name); return new CudaArray(context, size, sizeof(T), name);
} }
/** /**
...@@ -71,7 +71,7 @@ public: ...@@ -71,7 +71,7 @@ public:
* @param elementSize the size of each element in bytes * @param elementSize the size of each element in bytes
* @param name the name of the array * @param name the name of the array
*/ */
CudaArray(CudaContext& context, int size, int elementSize, const std::string& name); CudaArray(CudaContext& context, size_t size, int elementSize, const std::string& name);
~CudaArray(); ~CudaArray();
/** /**
* Initialize this object. * Initialize this object.
...@@ -81,7 +81,7 @@ public: ...@@ -81,7 +81,7 @@ public:
* @param elementSize the size of each element in bytes * @param elementSize the size of each element in bytes
* @param name the name of the array * @param name the name of the array
*/ */
void initialize(ComputeContext& context, int size, int elementSize, const std::string& name); void initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name);
/** /**
* Initialize this object. The template argument is the data type of each array element. * Initialize this object. The template argument is the data type of each array element.
* *
...@@ -90,13 +90,13 @@ public: ...@@ -90,13 +90,13 @@ public:
* @param name the name of the array * @param name the name of the array
*/ */
template <class T> template <class T>
void initialize(ComputeContext& context, int size, const std::string& name) { void initialize(ComputeContext& context, size_t size, const std::string& name) {
initialize(context, size, sizeof(T), name); initialize(context, size, sizeof(T), name);
} }
/** /**
* Recreate the internal storage to have a different size. * Recreate the internal storage to have a different size.
*/ */
void resize(int size); void resize(size_t size);
/** /**
* Get whether this array has been initialized. * Get whether this array has been initialized.
*/ */
...@@ -106,7 +106,7 @@ public: ...@@ -106,7 +106,7 @@ public:
/** /**
* Get the number of elements in the array. * Get the number of elements in the array.
*/ */
int getSize() const { size_t getSize() const {
return size; return size;
} }
/** /**
...@@ -182,7 +182,8 @@ public: ...@@ -182,7 +182,8 @@ public:
private: private:
CudaContext* context; CudaContext* context;
CUdeviceptr pointer; CUdeviceptr pointer;
int size, elementSize; size_t size;
int elementSize;
bool ownsMemory; bool ownsMemory;
std::string name; std::string name;
}; };
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. * * Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -338,7 +338,7 @@ private: ...@@ -338,7 +338,7 @@ private:
CudaArray rebuildNeighborList; CudaArray rebuildNeighborList;
CudaSort* blockSorter; CudaSort* blockSorter;
CUevent downloadCountEvent; CUevent downloadCountEvent;
int* pinnedCountBuffer; unsigned int* pinnedCountBuffer;
std::vector<void*> forceArgs, findBlockBoundsArgs, sortBoxDataArgs, findInteractingBlocksArgs; std::vector<void*> forceArgs, findBlockBoundsArgs, sortBoxDataArgs, findInteractingBlocksArgs;
std::vector<std::vector<int> > atomExclusions; std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters; std::vector<ParameterInfo> parameters;
...@@ -348,7 +348,8 @@ private: ...@@ -348,7 +348,8 @@ private:
std::map<int, std::string> groupKernelSource; std::map<int, std::string> groupKernelSource;
double lastCutoff; double lastCutoff;
bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList, canUsePairList; bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList, canUsePairList;
int startTileIndex, startBlockIndex, numBlocks, maxTiles, maxSinglePairs, maxExclusions, numForceThreadBlocks, forceThreadBlockSize, numAtoms, groupFlags; int startTileIndex, startBlockIndex, numBlocks, maxExclusions, numForceThreadBlocks, forceThreadBlockSize, numAtoms, groupFlags;
unsigned int maxTiles, maxSinglePairs;
long long numTiles; long long numTiles;
std::string kernelSource; std::string kernelSource;
}; };
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2012-2021 Stanford University and the Authors. * * Portions copyright (c) 2012-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -36,7 +36,7 @@ using namespace OpenMM; ...@@ -36,7 +36,7 @@ using namespace OpenMM;
CudaArray::CudaArray() : pointer(0), ownsMemory(false) { CudaArray::CudaArray() : pointer(0), ownsMemory(false) {
} }
CudaArray::CudaArray(CudaContext& context, int size, int elementSize, const std::string& name) : pointer(0) { CudaArray::CudaArray(CudaContext& context, size_t size, int elementSize, const std::string& name) : pointer(0) {
initialize(context, size, elementSize, name); initialize(context, size, elementSize, name);
} }
...@@ -52,7 +52,7 @@ CudaArray::~CudaArray() { ...@@ -52,7 +52,7 @@ CudaArray::~CudaArray() {
} }
} }
void CudaArray::initialize(ComputeContext& context, int size, int elementSize, const std::string& name) { void CudaArray::initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name) {
if (this->pointer != 0) if (this->pointer != 0)
throw OpenMMException("CudaArray has already been initialized"); throw OpenMMException("CudaArray has already been initialized");
this->context = &dynamic_cast<CudaContext&>(context); this->context = &dynamic_cast<CudaContext&>(context);
...@@ -69,7 +69,7 @@ void CudaArray::initialize(ComputeContext& context, int size, int elementSize, c ...@@ -69,7 +69,7 @@ void CudaArray::initialize(ComputeContext& context, int size, int elementSize, c
} }
} }
void CudaArray::resize(int size) { void CudaArray::resize(size_t size) {
if (pointer == 0) if (pointer == 0)
throw OpenMMException("CudaArray has not been initialized"); throw OpenMMException("CudaArray has not been initialized");
if (!ownsMemory) if (!ownsMemory)
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2018 Stanford University and the Authors. * * Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -71,7 +71,7 @@ CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(c ...@@ -71,7 +71,7 @@ CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(c
int multiprocessors; int multiprocessors;
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, context.getDevice())); CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, context.getDevice()));
CHECK_RESULT(cuEventCreate(&downloadCountEvent, 0)); CHECK_RESULT(cuEventCreate(&downloadCountEvent, 0));
CHECK_RESULT(cuMemHostAlloc((void**) &pinnedCountBuffer, 2*sizeof(int), CU_MEMHOSTALLOC_PORTABLE)); CHECK_RESULT(cuMemHostAlloc((void**) &pinnedCountBuffer, 2*sizeof(unsigned int), CU_MEMHOSTALLOC_PORTABLE));
numForceThreadBlocks = 4*multiprocessors; numForceThreadBlocks = 4*multiprocessors;
forceThreadBlockSize = (context.getComputeCapability() < 2.0 ? 128 : 256); forceThreadBlockSize = (context.getComputeCapability() < 2.0 ? 128 : 256);
setKernelSource(CudaKernelSources::nonbonded); setKernelSource(CudaKernelSources::nonbonded);
...@@ -430,12 +430,13 @@ bool CudaNonbondedUtilities::updateNeighborListSize() { ...@@ -430,12 +430,13 @@ bool CudaNonbondedUtilities::updateNeighborListSize() {
// this from happening in the future. // this from happening in the future.
if (pinnedCountBuffer[0] > maxTiles) { if (pinnedCountBuffer[0] > maxTiles) {
maxTiles = (int) (1.2*pinnedCountBuffer[0]); maxTiles = (unsigned int) (1.2*pinnedCountBuffer[0]);
int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2; unsigned int numBlocks = context.getNumAtomBlocks();
int totalTiles = numBlocks*(numBlocks+1)/2;
if (maxTiles > totalTiles) if (maxTiles > totalTiles)
maxTiles = totalTiles; maxTiles = totalTiles;
interactingTiles.resize(maxTiles); interactingTiles.resize(maxTiles);
interactingAtoms.resize(CudaContext::TileSize*maxTiles); interactingAtoms.resize(CudaContext::TileSize*(size_t) maxTiles);
if (forceArgs.size() > 0) if (forceArgs.size() > 0)
forceArgs[7] = &interactingTiles.getDevicePointer(); forceArgs[7] = &interactingTiles.getDevicePointer();
findInteractingBlocksArgs[6] = &interactingTiles.getDevicePointer(); findInteractingBlocksArgs[6] = &interactingTiles.getDevicePointer();
...@@ -444,7 +445,7 @@ bool CudaNonbondedUtilities::updateNeighborListSize() { ...@@ -444,7 +445,7 @@ bool CudaNonbondedUtilities::updateNeighborListSize() {
findInteractingBlocksArgs[7] = &interactingAtoms.getDevicePointer(); findInteractingBlocksArgs[7] = &interactingAtoms.getDevicePointer();
} }
if (pinnedCountBuffer[1] > maxSinglePairs) { if (pinnedCountBuffer[1] > maxSinglePairs) {
maxSinglePairs = (int) (1.2*pinnedCountBuffer[1]); maxSinglePairs = (unsigned int) (1.2*pinnedCountBuffer[1]);
singlePairs.resize(maxSinglePairs); singlePairs.resize(maxSinglePairs);
if (forceArgs.size() > 0) if (forceArgs.size() > 0)
forceArgs[19] = &singlePairs.getDevicePointer(); forceArgs[19] = &singlePairs.getDevicePointer();
......
...@@ -76,7 +76,7 @@ extern "C" __global__ void sortBoxData(const real2* __restrict__ sortedBlock, co ...@@ -76,7 +76,7 @@ extern "C" __global__ void sortBoxData(const real2* __restrict__ sortedBlock, co
} }
} }
__device__ int saveSinglePairs(int x, int* atoms, int* flags, int length, unsigned int maxSinglePairs, unsigned int* singlePairCount, int2* singlePairs, int* sumBuffer, volatile int& pairStartIndex) { __device__ int saveSinglePairs(int x, int* atoms, int* flags, int length, unsigned int maxSinglePairs, unsigned int* singlePairCount, int2* singlePairs, int* sumBuffer, volatile unsigned int& pairStartIndex) {
// Record interactions that should be computed as single pairs rather than in blocks. // Record interactions that should be computed as single pairs rather than in blocks.
const int indexInWarp = threadIdx.x%32; const int indexInWarp = threadIdx.x%32;
...@@ -95,7 +95,7 @@ __device__ int saveSinglePairs(int x, int* atoms, int* flags, int length, unsign ...@@ -95,7 +95,7 @@ __device__ int saveSinglePairs(int x, int* atoms, int* flags, int length, unsign
pairStartIndex = atomicAdd(singlePairCount,(unsigned int) sum); pairStartIndex = atomicAdd(singlePairCount,(unsigned int) sum);
__syncwarp(); __syncwarp();
int prevSum = __shfl_up_sync(0xffffffff, sum, 1); int prevSum = __shfl_up_sync(0xffffffff, sum, 1);
int pairIndex = pairStartIndex + (indexInWarp > 0 ? prevSum : 0); unsigned int pairIndex = pairStartIndex + (indexInWarp > 0 ? prevSum : 0);
for (int i = indexInWarp; i < length; i += 32) { for (int i = indexInWarp; i < length; i += 32) {
int count = __popc(flags[i]); int count = __popc(flags[i]);
if (count <= MAX_BITS_FOR_PAIRS && pairIndex+count <= maxSinglePairs) { if (count <= MAX_BITS_FOR_PAIRS && pairIndex+count <= maxSinglePairs) {
...@@ -196,14 +196,14 @@ extern "C" __global__ __launch_bounds__(GROUP_SIZE,3) void findBlocksWithInterac ...@@ -196,14 +196,14 @@ extern "C" __global__ __launch_bounds__(GROUP_SIZE,3) void findBlocksWithInterac
__shared__ int workgroupFlagsBuffer[BUFFER_SIZE*(GROUP_SIZE/32)]; __shared__ int workgroupFlagsBuffer[BUFFER_SIZE*(GROUP_SIZE/32)];
__shared__ int warpExclusions[MAX_EXCLUSIONS*(GROUP_SIZE/32)]; __shared__ int warpExclusions[MAX_EXCLUSIONS*(GROUP_SIZE/32)];
__shared__ real4 posBuffer[GROUP_SIZE]; __shared__ real4 posBuffer[GROUP_SIZE];
__shared__ volatile int workgroupTileIndex[GROUP_SIZE/32]; __shared__ volatile unsigned int workgroupTileIndex[GROUP_SIZE/32];
__shared__ int worksgroupPairStartIndex[GROUP_SIZE/32]; __shared__ unsigned int workgroupPairStartIndex[GROUP_SIZE/32];
int* sumBuffer = (int*) posBuffer; // Reuse the same buffer to save memory int* sumBuffer = (int*) posBuffer; // Reuse the same buffer to save memory
int* buffer = workgroupBuffer+BUFFER_SIZE*(warpStart/32); int* buffer = workgroupBuffer+BUFFER_SIZE*(warpStart/32);
int* flagsBuffer = workgroupFlagsBuffer+BUFFER_SIZE*(warpStart/32); int* flagsBuffer = workgroupFlagsBuffer+BUFFER_SIZE*(warpStart/32);
int* exclusionsForX = warpExclusions+MAX_EXCLUSIONS*(warpStart/32); int* exclusionsForX = warpExclusions+MAX_EXCLUSIONS*(warpStart/32);
volatile int& tileStartIndex = workgroupTileIndex[warpStart/32]; volatile unsigned int& tileStartIndex = workgroupTileIndex[warpStart/32];
volatile int& pairStartIndex = worksgroupPairStartIndex[warpStart/32]; volatile unsigned int& pairStartIndex = workgroupPairStartIndex[warpStart/32];
// Loop over blocks. // Loop over blocks.
...@@ -342,11 +342,11 @@ extern "C" __global__ __launch_bounds__(GROUP_SIZE,3) void findBlocksWithInterac ...@@ -342,11 +342,11 @@ extern "C" __global__ __launch_bounds__(GROUP_SIZE,3) void findBlocksWithInterac
#if MAX_BITS_FOR_PAIRS > 0 #if MAX_BITS_FOR_PAIRS > 0
neighborsInBuffer = saveSinglePairs(x, buffer, flagsBuffer, neighborsInBuffer, maxSinglePairs, &interactionCount[1], singlePairs, sumBuffer+warpStart, pairStartIndex); neighborsInBuffer = saveSinglePairs(x, buffer, flagsBuffer, neighborsInBuffer, maxSinglePairs, &interactionCount[1], singlePairs, sumBuffer+warpStart, pairStartIndex);
#endif #endif
int tilesToStore = neighborsInBuffer/TILE_SIZE; unsigned int tilesToStore = neighborsInBuffer/TILE_SIZE;
if (tilesToStore > 0) { if (tilesToStore > 0) {
if (indexInWarp == 0) if (indexInWarp == 0)
tileStartIndex = atomicAdd(&interactionCount[0], tilesToStore); tileStartIndex = atomicAdd(&interactionCount[0], tilesToStore);
int newTileStartIndex = tileStartIndex; unsigned int newTileStartIndex = tileStartIndex;
if (newTileStartIndex+tilesToStore <= maxTiles) { if (newTileStartIndex+tilesToStore <= maxTiles) {
if (indexInWarp < tilesToStore) if (indexInWarp < tilesToStore)
interactingTiles[newTileStartIndex+indexInWarp] = x; interactingTiles[newTileStartIndex+indexInWarp] = x;
...@@ -369,10 +369,10 @@ extern "C" __global__ __launch_bounds__(GROUP_SIZE,3) void findBlocksWithInterac ...@@ -369,10 +369,10 @@ extern "C" __global__ __launch_bounds__(GROUP_SIZE,3) void findBlocksWithInterac
neighborsInBuffer = saveSinglePairs(x, buffer, flagsBuffer, neighborsInBuffer, maxSinglePairs, &interactionCount[1], singlePairs, sumBuffer+warpStart, pairStartIndex); neighborsInBuffer = saveSinglePairs(x, buffer, flagsBuffer, neighborsInBuffer, maxSinglePairs, &interactionCount[1], singlePairs, sumBuffer+warpStart, pairStartIndex);
#endif #endif
if (neighborsInBuffer > 0) { if (neighborsInBuffer > 0) {
int tilesToStore = (neighborsInBuffer+TILE_SIZE-1)/TILE_SIZE; unsigned int tilesToStore = (neighborsInBuffer+TILE_SIZE-1)/TILE_SIZE;
if (indexInWarp == 0) if (indexInWarp == 0)
tileStartIndex = atomicAdd(&interactionCount[0], tilesToStore); tileStartIndex = atomicAdd(&interactionCount[0], tilesToStore);
int newTileStartIndex = tileStartIndex; unsigned int newTileStartIndex = tileStartIndex;
if (newTileStartIndex+tilesToStore <= maxTiles) { if (newTileStartIndex+tilesToStore <= maxTiles) {
if (indexInWarp < tilesToStore) if (indexInWarp < tilesToStore)
interactingTiles[newTileStartIndex+indexInWarp] = x; interactingTiles[newTileStartIndex+indexInWarp] = x;
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2021 Stanford University and the Authors. * * Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -59,7 +59,7 @@ public: ...@@ -59,7 +59,7 @@ public:
* @param flags the set of flags to specify when creating the OpenCL Buffer * @param flags the set of flags to specify when creating the OpenCL Buffer
*/ */
template <class T> template <class T>
static OpenCLArray* create(OpenCLContext& context, int size, const std::string& name, cl_int flags = CL_MEM_READ_WRITE) { static OpenCLArray* create(OpenCLContext& context, size_t size, const std::string& name, cl_int flags = CL_MEM_READ_WRITE) {
return new OpenCLArray(context, size, sizeof(T), name, flags); return new OpenCLArray(context, size, sizeof(T), name, flags);
} }
/** /**
...@@ -72,7 +72,7 @@ public: ...@@ -72,7 +72,7 @@ public:
* @param name the name of the array * @param name the name of the array
*/ */
template <class T> template <class T>
static OpenCLArray* create(OpenCLContext& context, cl::Buffer* buffer, int size, const std::string& name) { static OpenCLArray* create(OpenCLContext& context, cl::Buffer* buffer, size_t size, const std::string& name) {
return new OpenCLArray(context, buffer, size, sizeof(T), name); return new OpenCLArray(context, buffer, size, sizeof(T), name);
} }
/** /**
...@@ -89,7 +89,7 @@ public: ...@@ -89,7 +89,7 @@ public:
* @param name the name of the array * @param name the name of the array
* @param flags the set of flags to specify when creating the OpenCL Buffer * @param flags the set of flags to specify when creating the OpenCL Buffer
*/ */
OpenCLArray(OpenCLContext& context, int size, int elementSize, const std::string& name, cl_int flags = CL_MEM_READ_WRITE); OpenCLArray(OpenCLContext& context, size_t size, int elementSize, const std::string& name, cl_int flags = CL_MEM_READ_WRITE);
/** /**
* Create an OpenCLArray object that uses a preexisting Buffer. * Create an OpenCLArray object that uses a preexisting Buffer.
* *
...@@ -99,7 +99,7 @@ public: ...@@ -99,7 +99,7 @@ public:
* @param elementSize the size of each element in bytes * @param elementSize the size of each element in bytes
* @param name the name of the array * @param name the name of the array
*/ */
OpenCLArray(OpenCLContext& context, cl::Buffer* buffer, int size, int elementSize, const std::string& name); OpenCLArray(OpenCLContext& context, cl::Buffer* buffer, size_t size, int elementSize, const std::string& name);
~OpenCLArray(); ~OpenCLArray();
/** /**
* Initialize this array. * Initialize this array.
...@@ -109,7 +109,7 @@ public: ...@@ -109,7 +109,7 @@ public:
* @param elementSize the size of each element in bytes * @param elementSize the size of each element in bytes
* @param name the name of the array * @param name the name of the array
*/ */
void initialize(ComputeContext& context, int size, int elementSize, const std::string& name); void initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name);
/** /**
* Initialize this object. * Initialize this object.
* *
...@@ -119,7 +119,7 @@ public: ...@@ -119,7 +119,7 @@ public:
* @param name the name of the array * @param name the name of the array
* @param flags the set of flags to specify when creating the OpenCL Buffer * @param flags the set of flags to specify when creating the OpenCL Buffer
*/ */
void initialize(OpenCLContext& context, int size, int elementSize, const std::string& name, cl_int flags); void initialize(OpenCLContext& context, size_t size, int elementSize, const std::string& name, cl_int flags);
/** /**
* Initialize this object to use a preexisting Buffer. * Initialize this object to use a preexisting Buffer.
* *
...@@ -129,7 +129,7 @@ public: ...@@ -129,7 +129,7 @@ public:
* @param elementSize the size of each element in bytes * @param elementSize the size of each element in bytes
* @param name the name of the array * @param name the name of the array
*/ */
void initialize(OpenCLContext& context, cl::Buffer* buffer, int size, int elementSize, const std::string& name); void initialize(OpenCLContext& context, cl::Buffer* buffer, size_t size, int elementSize, const std::string& name);
/** /**
* Initialize this object. The template argument is the data type of each array element. * Initialize this object. The template argument is the data type of each array element.
* *
...@@ -139,7 +139,7 @@ public: ...@@ -139,7 +139,7 @@ public:
* @param flags the set of flags to specify when creating the OpenCL Buffer * @param flags the set of flags to specify when creating the OpenCL Buffer
*/ */
template <class T> template <class T>
void initialize(OpenCLContext& context, int size, const std::string& name, cl_int flags = CL_MEM_READ_WRITE) { void initialize(OpenCLContext& context, size_t size, const std::string& name, cl_int flags = CL_MEM_READ_WRITE) {
initialize(context, size, sizeof(T), name, flags); initialize(context, size, sizeof(T), name, flags);
} }
/** /**
...@@ -152,13 +152,13 @@ public: ...@@ -152,13 +152,13 @@ public:
* @param name the name of the array * @param name the name of the array
*/ */
template <class T> template <class T>
void initialize(OpenCLContext& context, cl::Buffer* buffer, int size, const std::string& name) { void initialize(OpenCLContext& context, cl::Buffer* buffer, size_t size, const std::string& name) {
initialize(context, buffer, size, sizeof(T), name); initialize(context, buffer, size, sizeof(T), name);
} }
/** /**
* Recreate the internal storage to have a different size. * Recreate the internal storage to have a different size.
*/ */
void resize(int size); void resize(size_t size);
/** /**
* Get whether this array has been initialized. * Get whether this array has been initialized.
*/ */
...@@ -168,7 +168,7 @@ public: ...@@ -168,7 +168,7 @@ public:
/** /**
* Get the size of the array. * Get the size of the array.
*/ */
int getSize() const { size_t getSize() const {
return size; return size;
} }
/** /**
...@@ -241,7 +241,8 @@ public: ...@@ -241,7 +241,8 @@ public:
private: private:
OpenCLContext* context; OpenCLContext* context;
cl::Buffer* buffer; cl::Buffer* buffer;
int size, elementSize; size_t size;
int elementSize;
cl_int flags; cl_int flags;
bool ownsBuffer; bool ownsBuffer;
std::string name; std::string name;
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. * * Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -322,7 +322,7 @@ private: ...@@ -322,7 +322,7 @@ private:
OpenCLSort* blockSorter; OpenCLSort* blockSorter;
cl::Event downloadCountEvent; cl::Event downloadCountEvent;
cl::Buffer* pinnedCountBuffer; cl::Buffer* pinnedCountBuffer;
int* pinnedCountMemory; unsigned int* pinnedCountMemory;
std::vector<std::vector<int> > atomExclusions; std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters; std::vector<ParameterInfo> parameters;
std::vector<ParameterInfo> arguments; std::vector<ParameterInfo> arguments;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2012-2021 Stanford University and the Authors. * * Portions copyright (c) 2012-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -35,11 +35,11 @@ using namespace OpenMM; ...@@ -35,11 +35,11 @@ using namespace OpenMM;
OpenCLArray::OpenCLArray() : buffer(NULL), ownsBuffer(false) { OpenCLArray::OpenCLArray() : buffer(NULL), ownsBuffer(false) {
} }
OpenCLArray::OpenCLArray(OpenCLContext& context, int size, int elementSize, const std::string& name, cl_int flags) : buffer(NULL) { OpenCLArray::OpenCLArray(OpenCLContext& context, size_t size, int elementSize, const std::string& name, cl_int flags) : buffer(NULL) {
initialize(context, size, elementSize, name, flags); initialize(context, size, elementSize, name, flags);
} }
OpenCLArray::OpenCLArray(OpenCLContext& context, cl::Buffer* buffer, int size, int elementSize, const std::string& name) : buffer(NULL) { OpenCLArray::OpenCLArray(OpenCLContext& context, cl::Buffer* buffer, size_t size, int elementSize, const std::string& name) : buffer(NULL) {
initialize(context, buffer, size, elementSize, name); initialize(context, buffer, size, elementSize, name);
} }
...@@ -48,11 +48,11 @@ OpenCLArray::~OpenCLArray() { ...@@ -48,11 +48,11 @@ OpenCLArray::~OpenCLArray() {
delete buffer; delete buffer;
} }
void OpenCLArray::initialize(ComputeContext& context, int size, int elementSize, const std::string& name) { void OpenCLArray::initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name) {
initialize(dynamic_cast<OpenCLContext&>(context), size, elementSize, name, CL_MEM_READ_WRITE); initialize(dynamic_cast<OpenCLContext&>(context), size, elementSize, name, CL_MEM_READ_WRITE);
} }
void OpenCLArray::initialize(OpenCLContext& context, int size, int elementSize, const std::string& name, cl_int flags) { void OpenCLArray::initialize(OpenCLContext& context, size_t size, int elementSize, const std::string& name, cl_int flags) {
if (buffer != NULL) if (buffer != NULL)
throw OpenMMException("OpenCLArray has already been initialized"); throw OpenMMException("OpenCLArray has already been initialized");
this->context = &context; this->context = &context;
...@@ -71,7 +71,7 @@ void OpenCLArray::initialize(OpenCLContext& context, int size, int elementSize, ...@@ -71,7 +71,7 @@ void OpenCLArray::initialize(OpenCLContext& context, int size, int elementSize,
} }
} }
void OpenCLArray::initialize(OpenCLContext& context, cl::Buffer* buffer, int size, int elementSize, const std::string& name) { void OpenCLArray::initialize(OpenCLContext& context, cl::Buffer* buffer, size_t size, int elementSize, const std::string& name) {
if (this->buffer != NULL) if (this->buffer != NULL)
throw OpenMMException("OpenCLArray has already been initialized"); throw OpenMMException("OpenCLArray has already been initialized");
this->context = &context; this->context = &context;
...@@ -82,7 +82,7 @@ void OpenCLArray::initialize(OpenCLContext& context, cl::Buffer* buffer, int siz ...@@ -82,7 +82,7 @@ void OpenCLArray::initialize(OpenCLContext& context, cl::Buffer* buffer, int siz
ownsBuffer = false; ownsBuffer = false;
} }
void OpenCLArray::resize(int size) { void OpenCLArray::resize(size_t size) {
if (buffer == NULL) if (buffer == NULL)
throw OpenMMException("OpenCLArray has not been initialized"); throw OpenMMException("OpenCLArray has not been initialized");
if (!ownsBuffer) if (!ownsBuffer)
......
...@@ -532,9 +532,9 @@ void OpenCLContext::initialize() { ...@@ -532,9 +532,9 @@ void OpenCLContext::initialize() {
energyParamDerivBuffer.initialize<cl_float>(*this, numEnergyParamDerivs*energyBufferSize, "energyParamDerivBuffer"); energyParamDerivBuffer.initialize<cl_float>(*this, numEnergyParamDerivs*energyBufferSize, "energyParamDerivBuffer");
addAutoclearBuffer(energyParamDerivBuffer); addAutoclearBuffer(energyParamDerivBuffer);
} }
int bufferBytes = max(max(velm.getSize()*velm.getElementSize(), int bufferBytes = max(max((int) velm.getSize()*velm.getElementSize(),
energyBufferSize*energyBuffer.getElementSize()), energyBufferSize*energyBuffer.getElementSize()),
longForceBuffer.getSize()*longForceBuffer.getElementSize()); (int) longForceBuffer.getSize()*longForceBuffer.getElementSize());
pinnedBuffer = new cl::Buffer(context, CL_MEM_ALLOC_HOST_PTR, bufferBytes); pinnedBuffer = new cl::Buffer(context, CL_MEM_ALLOC_HOST_PTR, bufferBytes);
pinnedMemory = currentQueue.enqueueMapBuffer(*pinnedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes); pinnedMemory = currentQueue.enqueueMapBuffer(*pinnedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2020 Stanford University and the Authors. * * Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -89,8 +89,8 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con ...@@ -89,8 +89,8 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con
numForceBuffers = numForceThreadBlocks*forceThreadBlockSize/OpenCLContext::TileSize; numForceBuffers = numForceThreadBlocks*forceThreadBlockSize/OpenCLContext::TileSize;
} }
} }
pinnedCountBuffer = new cl::Buffer(context.getContext(), CL_MEM_ALLOC_HOST_PTR, sizeof(int)); pinnedCountBuffer = new cl::Buffer(context.getContext(), CL_MEM_ALLOC_HOST_PTR, sizeof(unsigned int));
pinnedCountMemory = (int*) context.getQueue().enqueueMapBuffer(*pinnedCountBuffer, CL_TRUE, CL_MAP_READ, 0, sizeof(int)); pinnedCountMemory = (unsigned int*) context.getQueue().enqueueMapBuffer(*pinnedCountBuffer, CL_TRUE, CL_MAP_READ, 0, sizeof(int));
setKernelSource(deviceIsCpu ? OpenCLKernelSources::nonbonded_cpu : OpenCLKernelSources::nonbonded); setKernelSource(deviceIsCpu ? OpenCLKernelSources::nonbonded_cpu : OpenCLKernelSources::nonbonded);
} }
...@@ -395,18 +395,19 @@ void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool include ...@@ -395,18 +395,19 @@ void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool include
bool OpenCLNonbondedUtilities::updateNeighborListSize() { bool OpenCLNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff) if (!useCutoff)
return false; return false;
if (pinnedCountMemory[0] <= (unsigned int) interactingTiles.getSize()) if (pinnedCountMemory[0] <= interactingTiles.getSize())
return false; return false;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent // The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future. // this from happening in the future.
int maxTiles = (int) (1.2*pinnedCountMemory[0]); unsigned int maxTiles = (unsigned int) (1.2*pinnedCountMemory[0]);
int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2; unsigned int numBlocks = context.getNumAtomBlocks();
int totalTiles = numBlocks*(numBlocks+1)/2;
if (maxTiles > totalTiles) if (maxTiles > totalTiles)
maxTiles = totalTiles; maxTiles = totalTiles;
interactingTiles.resize(maxTiles); interactingTiles.resize(maxTiles);
interactingAtoms.resize(OpenCLContext::TileSize*maxTiles); interactingAtoms.resize(OpenCLContext::TileSize*(size_t) maxTiles);
for (map<int, KernelSet>::iterator iter = groupKernels.begin(); iter != groupKernels.end(); ++iter) { for (map<int, KernelSet>::iterator iter = groupKernels.begin(); iter != groupKernels.end(); ++iter) {
KernelSet& kernels = iter->second; KernelSet& kernels = iter->second;
if (*reinterpret_cast<cl_kernel*>(&kernels.forceKernel) != NULL) { if (*reinterpret_cast<cl_kernel*>(&kernels.forceKernel) != NULL) {
......
...@@ -97,12 +97,12 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi ...@@ -97,12 +97,12 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
__local int workgroupBuffer[BUFFER_SIZE*(GROUP_SIZE/32)]; __local int workgroupBuffer[BUFFER_SIZE*(GROUP_SIZE/32)];
__local int warpExclusions[MAX_EXCLUSIONS*(GROUP_SIZE/32)]; __local int warpExclusions[MAX_EXCLUSIONS*(GROUP_SIZE/32)];
__local real3 posBuffer[GROUP_SIZE]; __local real3 posBuffer[GROUP_SIZE];
__local volatile int workgroupTileIndex[GROUP_SIZE/32]; __local volatile unsigned int workgroupTileIndex[GROUP_SIZE/32];
__local bool includeBlockFlags[GROUP_SIZE]; __local bool includeBlockFlags[GROUP_SIZE];
__local volatile short2 atomCountBuffer[GROUP_SIZE]; __local volatile short2 atomCountBuffer[GROUP_SIZE];
__local int* buffer = workgroupBuffer+BUFFER_SIZE*(warpStart/32); __local int* buffer = workgroupBuffer+BUFFER_SIZE*(warpStart/32);
__local int* exclusionsForX = warpExclusions+MAX_EXCLUSIONS*(warpStart/32); __local int* exclusionsForX = warpExclusions+MAX_EXCLUSIONS*(warpStart/32);
__local volatile int* tileStartIndex = workgroupTileIndex+(warpStart/32); __local volatile unsigned int* tileStartIndex = workgroupTileIndex+(warpStart/32);
// Loop over blocks. // Loop over blocks.
...@@ -233,11 +233,11 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi ...@@ -233,11 +233,11 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
if (neighborsInBuffer > BUFFER_SIZE-TILE_SIZE) { if (neighborsInBuffer > BUFFER_SIZE-TILE_SIZE) {
// Store the new tiles to memory. // Store the new tiles to memory.
int tilesToStore = neighborsInBuffer/TILE_SIZE; unsigned int tilesToStore = neighborsInBuffer/TILE_SIZE;
if (indexInWarp == 0) if (indexInWarp == 0)
*tileStartIndex = atom_add(interactionCount, tilesToStore); *tileStartIndex = atom_add(interactionCount, tilesToStore);
SYNC_WARPS; SYNC_WARPS;
int newTileStartIndex = *tileStartIndex; unsigned int newTileStartIndex = *tileStartIndex;
if (newTileStartIndex+tilesToStore <= maxTiles) { if (newTileStartIndex+tilesToStore <= maxTiles) {
if (indexInWarp < tilesToStore) if (indexInWarp < tilesToStore)
interactingTiles[newTileStartIndex+indexInWarp] = x; interactingTiles[newTileStartIndex+indexInWarp] = x;
...@@ -255,11 +255,11 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi ...@@ -255,11 +255,11 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
// If we have a partially filled buffer, store it to memory. // If we have a partially filled buffer, store it to memory.
if (neighborsInBuffer > 0) { if (neighborsInBuffer > 0) {
int tilesToStore = (neighborsInBuffer+TILE_SIZE-1)/TILE_SIZE; unsigned int tilesToStore = (neighborsInBuffer+TILE_SIZE-1)/TILE_SIZE;
if (indexInWarp == 0) if (indexInWarp == 0)
*tileStartIndex = atom_add(interactionCount, tilesToStore); *tileStartIndex = atom_add(interactionCount, tilesToStore);
SYNC_WARPS; SYNC_WARPS;
int newTileStartIndex = *tileStartIndex; unsigned int newTileStartIndex = *tileStartIndex;
if (newTileStartIndex+tilesToStore <= maxTiles) { if (newTileStartIndex+tilesToStore <= maxTiles) {
if (indexInWarp < tilesToStore) if (indexInWarp < tilesToStore)
interactingTiles[newTileStartIndex+indexInWarp] = x; interactingTiles[newTileStartIndex+indexInWarp] = x;
......
...@@ -1255,7 +1255,7 @@ void CommonCalcAmoebaMultipoleForceKernel::computeInducedField() { ...@@ -1255,7 +1255,7 @@ void CommonCalcAmoebaMultipoleForceKernel::computeInducedField() {
computeInducedFieldKernel->setArg(7, numTileIndices); computeInducedFieldKernel->setArg(7, numTileIndices);
if (usePME) { if (usePME) {
setPeriodicBoxArgs(cc, computeInducedFieldKernel, 10); setPeriodicBoxArgs(cc, computeInducedFieldKernel, 10);
computeInducedFieldKernel->setArg(15, nb.getInteractingTiles().getSize()); computeInducedFieldKernel->setArg(15, (int) nb.getInteractingTiles().getSize());
} }
cc.clearBuffer(inducedField); cc.clearBuffer(inducedField);
cc.clearBuffer(inducedFieldPolar); cc.clearBuffer(inducedFieldPolar);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment