Unverified Commit 995c6318 authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Fixed potential invalid memory access (#3428)

* Fixed potential invalid memory access

* Fixed exception
parent 446aaeb4
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019-2021 Stanford University and the Authors. *
* Portions copyright (c) 2019-2022 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -51,7 +51,7 @@ public:
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
virtual void initialize(ComputeContext& context, int size, int elementSize, const std::string& name) = 0;
virtual void initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name) = 0;
/**
* Initialize this object. The template argument is the data type of each array element.
*
......@@ -60,13 +60,13 @@ public:
* @param name the name of the array
*/
template <class T>
void initialize(ComputeContext& context, int size, const std::string& name) {
void initialize(ComputeContext& context, size_t size, const std::string& name) {
initialize(context, size, sizeof(T), name);
}
/**
* Recreate the internal storage to have a different size.
*/
virtual void resize(int size) = 0;
virtual void resize(size_t size) = 0;
/**
* Get whether this array has been initialized.
*/
......@@ -74,7 +74,7 @@ public:
/**
* Get the number of elements in the array.
*/
virtual int getSize() const = 0;
virtual size_t getSize() const = 0;
/**
* Get the size of each element in bytes.
*/
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019-2021 Stanford University and the Authors. *
* Portions copyright (c) 2019-2022 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -60,7 +60,7 @@ public:
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
void initialize(ComputeContext& context, int size, int elementSize, const std::string& name);
void initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name);
/**
* Initialize this object. The template argument is the data type of each array element.
*
......@@ -69,13 +69,13 @@ public:
* @param name the name of the array
*/
template <class T>
void initialize(ComputeContext& context, int size, const std::string& name) {
void initialize(ComputeContext& context, size_t size, const std::string& name) {
initialize(context, size, sizeof(T), name);
}
/**
* Recreate the internal storage to have a different size.
*/
void resize(int size);
void resize(size_t size);
/**
* Get whether this array has been initialized.
*/
......@@ -83,7 +83,7 @@ public:
/**
* Get the number of elements in the array.
*/
int getSize() const;
size_t getSize() const;
/**
* Get the size of each element in bytes.
*/
......
......@@ -4734,7 +4734,7 @@ double CommonCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
startIndicesKernel->execute(256, 256);
copyPairsKernel->execute(maxNeighborPairs);
}
int maxThreads = min(cc.getNumAtoms()*forceWorkgroupSize, cc.getEnergyBuffer().getSize());
int maxThreads = min(cc.getNumAtoms()*forceWorkgroupSize, (int) cc.getEnergyBuffer().getSize());
forceKernel->execute(maxThreads, forceWorkgroupSize);
if (nonbondedMethod != NoCutoff) {
// Make sure there was enough memory for the neighbor list.
......@@ -5290,7 +5290,7 @@ void CommonCalcCustomCVForceKernel::initialize(const System& system, const Custo
copyForcesKernel->addArg(cc.getPaddedNumAtoms());
addForcesKernel = program->createKernel("addForces");
addForcesKernel->addArg(cc.getLongForceBuffer());
addForcesKernel->addArg(cc.getLongForceBuffer().getSize());
addForcesKernel->addArg((int) cc.getLongForceBuffer().getSize());
for (int i = 0; i < numCVs; i++) {
addForcesKernel->addArg();
addForcesKernel->addArg();
......@@ -6143,7 +6143,7 @@ std::pair<double, double> CommonIntegrateNoseHooverStepKernel::computeMaskedKine
reduceEnergyKernel->addArg(energyBuffer);
reduceEnergyKernel->addArg(kineticEnergyBuffer);
reduceEnergyKernel->addArg(energyBuffer.getSize());
reduceEnergyKernel->addArg((int) energyBuffer.getSize());
}
cc.clearBuffer(energyBuffer);
......@@ -6507,7 +6507,7 @@ void CommonIntegrateVariableLangevinStepKernel::initialize(const System& system,
selectSizeKernel = program->createKernel("selectLangevinStepSize");
params.initialize(cc, 3, cc.getUseDoublePrecision() || cc.getUseMixedPrecision() ? sizeof(double) : sizeof(float), "langevinParams");
blockSize = min(256, system.getNumParticles());
blockSize = max(blockSize, params.getSize());
blockSize = max(blockSize, (int) params.getSize());
}
double CommonIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) {
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019-2021 Stanford University and the Authors. *
* Portions copyright (c) 2019-2022 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -43,14 +43,14 @@ ArrayInterface& ComputeArray::getArray() {
return *impl;
}
void ComputeArray::initialize(ComputeContext& context, int size, int elementSize, const std::string& name) {
void ComputeArray::initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name) {
if (impl != NULL)
throw OpenMMException("The array "+getName()+" has already been initialized");
impl = context.createArray();
impl->initialize(context, size, elementSize, name);
}
void ComputeArray::resize(int size) {
void ComputeArray::resize(size_t size) {
if (impl == NULL)
throw OpenMMException("ComputeArray has not been initialized");
impl->resize(size);
......@@ -60,7 +60,7 @@ bool ComputeArray::isInitialized() const {
return (impl != NULL);
}
int ComputeArray::getSize() const {
size_t ComputeArray::getSize() const {
if (impl == NULL)
throw OpenMMException("ComputeArray has not been initialized");
return impl->getSize();
......
......@@ -602,7 +602,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
// Set arguments for constraint kernels.
if (settleAtoms.isInitialized()) {
settlePosKernel->addArg(settleAtoms.getSize());
settlePosKernel->addArg((int) settleAtoms.getSize());
settlePosKernel->addArg();
settlePosKernel->addArg(context.getPosq());
settlePosKernel->addArg(posDelta);
......@@ -611,7 +611,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
settlePosKernel->addArg(settleParams);
if (context.getUseMixedPrecision())
settlePosKernel->addArg(context.getPosqCorrection());
settleVelKernel->addArg(settleAtoms.getSize());
settleVelKernel->addArg((int) settleAtoms.getSize());
settleVelKernel->addArg();
settleVelKernel->addArg(context.getPosq());
settleVelKernel->addArg(posDelta);
......@@ -622,7 +622,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
settleVelKernel->addArg(context.getPosqCorrection());
}
if (shakeAtoms.isInitialized()) {
shakePosKernel->addArg(shakeAtoms.getSize());
shakePosKernel->addArg((int) shakeAtoms.getSize());
shakePosKernel->addArg();
shakePosKernel->addArg(context.getPosq());
shakePosKernel->addArg(posDelta);
......@@ -630,7 +630,7 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
shakePosKernel->addArg(shakeParams);
if (context.getUseMixedPrecision())
shakePosKernel->addArg(context.getPosqCorrection());
shakeVelKernel->addArg(shakeAtoms.getSize());
shakeVelKernel->addArg((int) shakeAtoms.getSize());
shakeVelKernel->addArg();
shakeVelKernel->addArg(context.getPosq());
shakeVelKernel->addArg(context.getVelm());
......@@ -755,7 +755,7 @@ void IntegrationUtilities::initRandomNumberGenerator(unsigned int randomNumberSe
random.initialize<mm_float4>(context, 4*context.getPaddedNumAtoms(), "random");
randomSeed.initialize<mm_int4>(context, context.getNumThreadBlocks()*64, "randomSeed");
randomPos = random.getSize();
randomKernel->addArg(random.getSize());
randomKernel->addArg((int) random.getSize());
randomKernel->addArg(random);
randomKernel->addArg(randomSeed);
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2021 Stanford University and the Authors. *
* Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -55,7 +55,7 @@ public:
* @param name the name of the array
*/
template <class T>
static CudaArray* create(CudaContext& context, int size, const std::string& name) {
static CudaArray* create(CudaContext& context, size_t size, const std::string& name) {
return new CudaArray(context, size, sizeof(T), name);
}
/**
......@@ -71,7 +71,7 @@ public:
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
CudaArray(CudaContext& context, int size, int elementSize, const std::string& name);
CudaArray(CudaContext& context, size_t size, int elementSize, const std::string& name);
~CudaArray();
/**
* Initialize this object.
......@@ -81,7 +81,7 @@ public:
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
void initialize(ComputeContext& context, int size, int elementSize, const std::string& name);
void initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name);
/**
* Initialize this object. The template argument is the data type of each array element.
*
......@@ -90,13 +90,13 @@ public:
* @param name the name of the array
*/
template <class T>
void initialize(ComputeContext& context, int size, const std::string& name) {
void initialize(ComputeContext& context, size_t size, const std::string& name) {
initialize(context, size, sizeof(T), name);
}
/**
* Recreate the internal storage to have a different size.
*/
void resize(int size);
void resize(size_t size);
/**
* Get whether this array has been initialized.
*/
......@@ -106,7 +106,7 @@ public:
/**
* Get the number of elements in the array.
*/
int getSize() const {
size_t getSize() const {
return size;
}
/**
......@@ -182,7 +182,8 @@ public:
private:
CudaContext* context;
CUdeviceptr pointer;
int size, elementSize;
size_t size;
int elementSize;
bool ownsMemory;
std::string name;
};
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -338,7 +338,7 @@ private:
CudaArray rebuildNeighborList;
CudaSort* blockSorter;
CUevent downloadCountEvent;
int* pinnedCountBuffer;
unsigned int* pinnedCountBuffer;
std::vector<void*> forceArgs, findBlockBoundsArgs, sortBoxDataArgs, findInteractingBlocksArgs;
std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters;
......@@ -348,7 +348,8 @@ private:
std::map<int, std::string> groupKernelSource;
double lastCutoff;
bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList, canUsePairList;
int startTileIndex, startBlockIndex, numBlocks, maxTiles, maxSinglePairs, maxExclusions, numForceThreadBlocks, forceThreadBlockSize, numAtoms, groupFlags;
int startTileIndex, startBlockIndex, numBlocks, maxExclusions, numForceThreadBlocks, forceThreadBlockSize, numAtoms, groupFlags;
unsigned int maxTiles, maxSinglePairs;
long long numTiles;
std::string kernelSource;
};
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012-2021 Stanford University and the Authors. *
* Portions copyright (c) 2012-2022 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -36,7 +36,7 @@ using namespace OpenMM;
CudaArray::CudaArray() : pointer(0), ownsMemory(false) {
}
CudaArray::CudaArray(CudaContext& context, int size, int elementSize, const std::string& name) : pointer(0) {
CudaArray::CudaArray(CudaContext& context, size_t size, int elementSize, const std::string& name) : pointer(0) {
initialize(context, size, elementSize, name);
}
......@@ -52,7 +52,7 @@ CudaArray::~CudaArray() {
}
}
void CudaArray::initialize(ComputeContext& context, int size, int elementSize, const std::string& name) {
void CudaArray::initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name) {
if (this->pointer != 0)
throw OpenMMException("CudaArray has already been initialized");
this->context = &dynamic_cast<CudaContext&>(context);
......@@ -69,7 +69,7 @@ void CudaArray::initialize(ComputeContext& context, int size, int elementSize, c
}
}
void CudaArray::resize(int size) {
void CudaArray::resize(size_t size) {
if (pointer == 0)
throw OpenMMException("CudaArray has not been initialized");
if (!ownsMemory)
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2018 Stanford University and the Authors. *
* Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -71,7 +71,7 @@ CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(c
int multiprocessors;
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, context.getDevice()));
CHECK_RESULT(cuEventCreate(&downloadCountEvent, 0));
CHECK_RESULT(cuMemHostAlloc((void**) &pinnedCountBuffer, 2*sizeof(int), CU_MEMHOSTALLOC_PORTABLE));
CHECK_RESULT(cuMemHostAlloc((void**) &pinnedCountBuffer, 2*sizeof(unsigned int), CU_MEMHOSTALLOC_PORTABLE));
numForceThreadBlocks = 4*multiprocessors;
forceThreadBlockSize = (context.getComputeCapability() < 2.0 ? 128 : 256);
setKernelSource(CudaKernelSources::nonbonded);
......@@ -430,12 +430,13 @@ bool CudaNonbondedUtilities::updateNeighborListSize() {
// this from happening in the future.
if (pinnedCountBuffer[0] > maxTiles) {
maxTiles = (int) (1.2*pinnedCountBuffer[0]);
int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2;
maxTiles = (unsigned int) (1.2*pinnedCountBuffer[0]);
unsigned int numBlocks = context.getNumAtomBlocks();
int totalTiles = numBlocks*(numBlocks+1)/2;
if (maxTiles > totalTiles)
maxTiles = totalTiles;
interactingTiles.resize(maxTiles);
interactingAtoms.resize(CudaContext::TileSize*maxTiles);
interactingAtoms.resize(CudaContext::TileSize*(size_t) maxTiles);
if (forceArgs.size() > 0)
forceArgs[7] = &interactingTiles.getDevicePointer();
findInteractingBlocksArgs[6] = &interactingTiles.getDevicePointer();
......@@ -444,7 +445,7 @@ bool CudaNonbondedUtilities::updateNeighborListSize() {
findInteractingBlocksArgs[7] = &interactingAtoms.getDevicePointer();
}
if (pinnedCountBuffer[1] > maxSinglePairs) {
maxSinglePairs = (int) (1.2*pinnedCountBuffer[1]);
maxSinglePairs = (unsigned int) (1.2*pinnedCountBuffer[1]);
singlePairs.resize(maxSinglePairs);
if (forceArgs.size() > 0)
forceArgs[19] = &singlePairs.getDevicePointer();
......
......@@ -76,7 +76,7 @@ extern "C" __global__ void sortBoxData(const real2* __restrict__ sortedBlock, co
}
}
__device__ int saveSinglePairs(int x, int* atoms, int* flags, int length, unsigned int maxSinglePairs, unsigned int* singlePairCount, int2* singlePairs, int* sumBuffer, volatile int& pairStartIndex) {
__device__ int saveSinglePairs(int x, int* atoms, int* flags, int length, unsigned int maxSinglePairs, unsigned int* singlePairCount, int2* singlePairs, int* sumBuffer, volatile unsigned int& pairStartIndex) {
// Record interactions that should be computed as single pairs rather than in blocks.
const int indexInWarp = threadIdx.x%32;
......@@ -95,7 +95,7 @@ __device__ int saveSinglePairs(int x, int* atoms, int* flags, int length, unsign
pairStartIndex = atomicAdd(singlePairCount,(unsigned int) sum);
__syncwarp();
int prevSum = __shfl_up_sync(0xffffffff, sum, 1);
int pairIndex = pairStartIndex + (indexInWarp > 0 ? prevSum : 0);
unsigned int pairIndex = pairStartIndex + (indexInWarp > 0 ? prevSum : 0);
for (int i = indexInWarp; i < length; i += 32) {
int count = __popc(flags[i]);
if (count <= MAX_BITS_FOR_PAIRS && pairIndex+count <= maxSinglePairs) {
......@@ -196,14 +196,14 @@ extern "C" __global__ __launch_bounds__(GROUP_SIZE,3) void findBlocksWithInterac
__shared__ int workgroupFlagsBuffer[BUFFER_SIZE*(GROUP_SIZE/32)];
__shared__ int warpExclusions[MAX_EXCLUSIONS*(GROUP_SIZE/32)];
__shared__ real4 posBuffer[GROUP_SIZE];
__shared__ volatile int workgroupTileIndex[GROUP_SIZE/32];
__shared__ int worksgroupPairStartIndex[GROUP_SIZE/32];
__shared__ volatile unsigned int workgroupTileIndex[GROUP_SIZE/32];
__shared__ unsigned int workgroupPairStartIndex[GROUP_SIZE/32];
int* sumBuffer = (int*) posBuffer; // Reuse the same buffer to save memory
int* buffer = workgroupBuffer+BUFFER_SIZE*(warpStart/32);
int* flagsBuffer = workgroupFlagsBuffer+BUFFER_SIZE*(warpStart/32);
int* exclusionsForX = warpExclusions+MAX_EXCLUSIONS*(warpStart/32);
volatile int& tileStartIndex = workgroupTileIndex[warpStart/32];
volatile int& pairStartIndex = worksgroupPairStartIndex[warpStart/32];
volatile unsigned int& tileStartIndex = workgroupTileIndex[warpStart/32];
volatile unsigned int& pairStartIndex = workgroupPairStartIndex[warpStart/32];
// Loop over blocks.
......@@ -342,11 +342,11 @@ extern "C" __global__ __launch_bounds__(GROUP_SIZE,3) void findBlocksWithInterac
#if MAX_BITS_FOR_PAIRS > 0
neighborsInBuffer = saveSinglePairs(x, buffer, flagsBuffer, neighborsInBuffer, maxSinglePairs, &interactionCount[1], singlePairs, sumBuffer+warpStart, pairStartIndex);
#endif
int tilesToStore = neighborsInBuffer/TILE_SIZE;
unsigned int tilesToStore = neighborsInBuffer/TILE_SIZE;
if (tilesToStore > 0) {
if (indexInWarp == 0)
tileStartIndex = atomicAdd(&interactionCount[0], tilesToStore);
int newTileStartIndex = tileStartIndex;
unsigned int newTileStartIndex = tileStartIndex;
if (newTileStartIndex+tilesToStore <= maxTiles) {
if (indexInWarp < tilesToStore)
interactingTiles[newTileStartIndex+indexInWarp] = x;
......@@ -369,10 +369,10 @@ extern "C" __global__ __launch_bounds__(GROUP_SIZE,3) void findBlocksWithInterac
neighborsInBuffer = saveSinglePairs(x, buffer, flagsBuffer, neighborsInBuffer, maxSinglePairs, &interactionCount[1], singlePairs, sumBuffer+warpStart, pairStartIndex);
#endif
if (neighborsInBuffer > 0) {
int tilesToStore = (neighborsInBuffer+TILE_SIZE-1)/TILE_SIZE;
unsigned int tilesToStore = (neighborsInBuffer+TILE_SIZE-1)/TILE_SIZE;
if (indexInWarp == 0)
tileStartIndex = atomicAdd(&interactionCount[0], tilesToStore);
int newTileStartIndex = tileStartIndex;
unsigned int newTileStartIndex = tileStartIndex;
if (newTileStartIndex+tilesToStore <= maxTiles) {
if (indexInWarp < tilesToStore)
interactingTiles[newTileStartIndex+indexInWarp] = x;
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2021 Stanford University and the Authors. *
* Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -59,7 +59,7 @@ public:
* @param flags the set of flags to specify when creating the OpenCL Buffer
*/
template <class T>
static OpenCLArray* create(OpenCLContext& context, int size, const std::string& name, cl_int flags = CL_MEM_READ_WRITE) {
static OpenCLArray* create(OpenCLContext& context, size_t size, const std::string& name, cl_int flags = CL_MEM_READ_WRITE) {
return new OpenCLArray(context, size, sizeof(T), name, flags);
}
/**
......@@ -72,7 +72,7 @@ public:
* @param name the name of the array
*/
template <class T>
static OpenCLArray* create(OpenCLContext& context, cl::Buffer* buffer, int size, const std::string& name) {
static OpenCLArray* create(OpenCLContext& context, cl::Buffer* buffer, size_t size, const std::string& name) {
return new OpenCLArray(context, buffer, size, sizeof(T), name);
}
/**
......@@ -89,7 +89,7 @@ public:
* @param name the name of the array
* @param flags the set of flags to specify when creating the OpenCL Buffer
*/
OpenCLArray(OpenCLContext& context, int size, int elementSize, const std::string& name, cl_int flags = CL_MEM_READ_WRITE);
OpenCLArray(OpenCLContext& context, size_t size, int elementSize, const std::string& name, cl_int flags = CL_MEM_READ_WRITE);
/**
* Create an OpenCLArray object that uses a preexisting Buffer.
*
......@@ -99,7 +99,7 @@ public:
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
OpenCLArray(OpenCLContext& context, cl::Buffer* buffer, int size, int elementSize, const std::string& name);
OpenCLArray(OpenCLContext& context, cl::Buffer* buffer, size_t size, int elementSize, const std::string& name);
~OpenCLArray();
/**
* Initialize this array.
......@@ -109,7 +109,7 @@ public:
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
void initialize(ComputeContext& context, int size, int elementSize, const std::string& name);
void initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name);
/**
* Initialize this object.
*
......@@ -119,7 +119,7 @@ public:
* @param name the name of the array
* @param flags the set of flags to specify when creating the OpenCL Buffer
*/
void initialize(OpenCLContext& context, int size, int elementSize, const std::string& name, cl_int flags);
void initialize(OpenCLContext& context, size_t size, int elementSize, const std::string& name, cl_int flags);
/**
* Initialize this object to use a preexisting Buffer.
*
......@@ -129,7 +129,7 @@ public:
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
void initialize(OpenCLContext& context, cl::Buffer* buffer, int size, int elementSize, const std::string& name);
void initialize(OpenCLContext& context, cl::Buffer* buffer, size_t size, int elementSize, const std::string& name);
/**
* Initialize this object. The template argument is the data type of each array element.
*
......@@ -139,7 +139,7 @@ public:
* @param flags the set of flags to specify when creating the OpenCL Buffer
*/
template <class T>
void initialize(OpenCLContext& context, int size, const std::string& name, cl_int flags = CL_MEM_READ_WRITE) {
void initialize(OpenCLContext& context, size_t size, const std::string& name, cl_int flags = CL_MEM_READ_WRITE) {
initialize(context, size, sizeof(T), name, flags);
}
/**
......@@ -152,13 +152,13 @@ public:
* @param name the name of the array
*/
template <class T>
void initialize(OpenCLContext& context, cl::Buffer* buffer, int size, const std::string& name) {
void initialize(OpenCLContext& context, cl::Buffer* buffer, size_t size, const std::string& name) {
initialize(context, buffer, size, sizeof(T), name);
}
/**
* Recreate the internal storage to have a different size.
*/
void resize(int size);
void resize(size_t size);
/**
* Get whether this array has been initialized.
*/
......@@ -168,7 +168,7 @@ public:
/**
* Get the size of the array.
*/
int getSize() const {
size_t getSize() const {
return size;
}
/**
......@@ -241,7 +241,8 @@ public:
private:
OpenCLContext* context;
cl::Buffer* buffer;
int size, elementSize;
size_t size;
int elementSize;
cl_int flags;
bool ownsBuffer;
std::string name;
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -322,7 +322,7 @@ private:
OpenCLSort* blockSorter;
cl::Event downloadCountEvent;
cl::Buffer* pinnedCountBuffer;
int* pinnedCountMemory;
unsigned int* pinnedCountMemory;
std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters;
std::vector<ParameterInfo> arguments;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012-2021 Stanford University and the Authors. *
* Portions copyright (c) 2012-2022 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -35,11 +35,11 @@ using namespace OpenMM;
OpenCLArray::OpenCLArray() : buffer(NULL), ownsBuffer(false) {
}
OpenCLArray::OpenCLArray(OpenCLContext& context, int size, int elementSize, const std::string& name, cl_int flags) : buffer(NULL) {
OpenCLArray::OpenCLArray(OpenCLContext& context, size_t size, int elementSize, const std::string& name, cl_int flags) : buffer(NULL) {
initialize(context, size, elementSize, name, flags);
}
OpenCLArray::OpenCLArray(OpenCLContext& context, cl::Buffer* buffer, int size, int elementSize, const std::string& name) : buffer(NULL) {
OpenCLArray::OpenCLArray(OpenCLContext& context, cl::Buffer* buffer, size_t size, int elementSize, const std::string& name) : buffer(NULL) {
initialize(context, buffer, size, elementSize, name);
}
......@@ -48,11 +48,11 @@ OpenCLArray::~OpenCLArray() {
delete buffer;
}
void OpenCLArray::initialize(ComputeContext& context, int size, int elementSize, const std::string& name) {
void OpenCLArray::initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name) {
initialize(dynamic_cast<OpenCLContext&>(context), size, elementSize, name, CL_MEM_READ_WRITE);
}
void OpenCLArray::initialize(OpenCLContext& context, int size, int elementSize, const std::string& name, cl_int flags) {
void OpenCLArray::initialize(OpenCLContext& context, size_t size, int elementSize, const std::string& name, cl_int flags) {
if (buffer != NULL)
throw OpenMMException("OpenCLArray has already been initialized");
this->context = &context;
......@@ -71,7 +71,7 @@ void OpenCLArray::initialize(OpenCLContext& context, int size, int elementSize,
}
}
void OpenCLArray::initialize(OpenCLContext& context, cl::Buffer* buffer, int size, int elementSize, const std::string& name) {
void OpenCLArray::initialize(OpenCLContext& context, cl::Buffer* buffer, size_t size, int elementSize, const std::string& name) {
if (this->buffer != NULL)
throw OpenMMException("OpenCLArray has already been initialized");
this->context = &context;
......@@ -82,7 +82,7 @@ void OpenCLArray::initialize(OpenCLContext& context, cl::Buffer* buffer, int siz
ownsBuffer = false;
}
void OpenCLArray::resize(int size) {
void OpenCLArray::resize(size_t size) {
if (buffer == NULL)
throw OpenMMException("OpenCLArray has not been initialized");
if (!ownsBuffer)
......
......@@ -532,9 +532,9 @@ void OpenCLContext::initialize() {
energyParamDerivBuffer.initialize<cl_float>(*this, numEnergyParamDerivs*energyBufferSize, "energyParamDerivBuffer");
addAutoclearBuffer(energyParamDerivBuffer);
}
int bufferBytes = max(max(velm.getSize()*velm.getElementSize(),
int bufferBytes = max(max((int) velm.getSize()*velm.getElementSize(),
energyBufferSize*energyBuffer.getElementSize()),
longForceBuffer.getSize()*longForceBuffer.getElementSize());
(int) longForceBuffer.getSize()*longForceBuffer.getElementSize());
pinnedBuffer = new cl::Buffer(context, CL_MEM_ALLOC_HOST_PTR, bufferBytes);
pinnedMemory = currentQueue.enqueueMapBuffer(*pinnedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
for (int i = 0; i < numAtoms; i++) {
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2020 Stanford University and the Authors. *
* Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -89,8 +89,8 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con
numForceBuffers = numForceThreadBlocks*forceThreadBlockSize/OpenCLContext::TileSize;
}
}
pinnedCountBuffer = new cl::Buffer(context.getContext(), CL_MEM_ALLOC_HOST_PTR, sizeof(int));
pinnedCountMemory = (int*) context.getQueue().enqueueMapBuffer(*pinnedCountBuffer, CL_TRUE, CL_MAP_READ, 0, sizeof(int));
pinnedCountBuffer = new cl::Buffer(context.getContext(), CL_MEM_ALLOC_HOST_PTR, sizeof(unsigned int));
pinnedCountMemory = (unsigned int*) context.getQueue().enqueueMapBuffer(*pinnedCountBuffer, CL_TRUE, CL_MAP_READ, 0, sizeof(int));
setKernelSource(deviceIsCpu ? OpenCLKernelSources::nonbonded_cpu : OpenCLKernelSources::nonbonded);
}
......@@ -395,18 +395,19 @@ void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool include
bool OpenCLNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff)
return false;
if (pinnedCountMemory[0] <= (unsigned int) interactingTiles.getSize())
if (pinnedCountMemory[0] <= interactingTiles.getSize())
return false;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
int maxTiles = (int) (1.2*pinnedCountMemory[0]);
int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2;
unsigned int maxTiles = (unsigned int) (1.2*pinnedCountMemory[0]);
unsigned int numBlocks = context.getNumAtomBlocks();
int totalTiles = numBlocks*(numBlocks+1)/2;
if (maxTiles > totalTiles)
maxTiles = totalTiles;
interactingTiles.resize(maxTiles);
interactingAtoms.resize(OpenCLContext::TileSize*maxTiles);
interactingAtoms.resize(OpenCLContext::TileSize*(size_t) maxTiles);
for (map<int, KernelSet>::iterator iter = groupKernels.begin(); iter != groupKernels.end(); ++iter) {
KernelSet& kernels = iter->second;
if (*reinterpret_cast<cl_kernel*>(&kernels.forceKernel) != NULL) {
......
......@@ -97,12 +97,12 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
__local int workgroupBuffer[BUFFER_SIZE*(GROUP_SIZE/32)];
__local int warpExclusions[MAX_EXCLUSIONS*(GROUP_SIZE/32)];
__local real3 posBuffer[GROUP_SIZE];
__local volatile int workgroupTileIndex[GROUP_SIZE/32];
__local volatile unsigned int workgroupTileIndex[GROUP_SIZE/32];
__local bool includeBlockFlags[GROUP_SIZE];
__local volatile short2 atomCountBuffer[GROUP_SIZE];
__local int* buffer = workgroupBuffer+BUFFER_SIZE*(warpStart/32);
__local int* exclusionsForX = warpExclusions+MAX_EXCLUSIONS*(warpStart/32);
__local volatile int* tileStartIndex = workgroupTileIndex+(warpStart/32);
__local volatile unsigned int* tileStartIndex = workgroupTileIndex+(warpStart/32);
// Loop over blocks.
......@@ -233,11 +233,11 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
if (neighborsInBuffer > BUFFER_SIZE-TILE_SIZE) {
// Store the new tiles to memory.
int tilesToStore = neighborsInBuffer/TILE_SIZE;
unsigned int tilesToStore = neighborsInBuffer/TILE_SIZE;
if (indexInWarp == 0)
*tileStartIndex = atom_add(interactionCount, tilesToStore);
SYNC_WARPS;
int newTileStartIndex = *tileStartIndex;
unsigned int newTileStartIndex = *tileStartIndex;
if (newTileStartIndex+tilesToStore <= maxTiles) {
if (indexInWarp < tilesToStore)
interactingTiles[newTileStartIndex+indexInWarp] = x;
......@@ -255,11 +255,11 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
// If we have a partially filled buffer, store it to memory.
if (neighborsInBuffer > 0) {
int tilesToStore = (neighborsInBuffer+TILE_SIZE-1)/TILE_SIZE;
unsigned int tilesToStore = (neighborsInBuffer+TILE_SIZE-1)/TILE_SIZE;
if (indexInWarp == 0)
*tileStartIndex = atom_add(interactionCount, tilesToStore);
SYNC_WARPS;
int newTileStartIndex = *tileStartIndex;
unsigned int newTileStartIndex = *tileStartIndex;
if (newTileStartIndex+tilesToStore <= maxTiles) {
if (indexInWarp < tilesToStore)
interactingTiles[newTileStartIndex+indexInWarp] = x;
......
......@@ -1255,7 +1255,7 @@ void CommonCalcAmoebaMultipoleForceKernel::computeInducedField() {
computeInducedFieldKernel->setArg(7, numTileIndices);
if (usePME) {
setPeriodicBoxArgs(cc, computeInducedFieldKernel, 10);
computeInducedFieldKernel->setArg(15, nb.getInteractingTiles().getSize());
computeInducedFieldKernel->setArg(15, (int) nb.getInteractingTiles().getSize());
}
cc.clearBuffer(inducedField);
cc.clearBuffer(inducedFieldPolar);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment