Unverified Commit cdc0789a authored by peastman's avatar peastman Committed by GitHub
Browse files

Fixed range overflow with very large numbers of atoms (#2806)

* Fixed range overflow with very large numbers of atoms

* More fixes to overflow with large numbers of atoms

* Fix test failures
parent b4543a46
...@@ -614,7 +614,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr ...@@ -614,7 +614,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
extern "C" __global__ void computeEDiffForce( extern "C" __global__ void computeEDiffForce(
unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer,
const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices, const int2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices,
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const real* __restrict__ inducedDipoleS, const real* __restrict__ inducedDipolePolarS, const real* __restrict__ inducedDipolePolar, const real* __restrict__ inducedDipoleS, const real* __restrict__ inducedDipolePolarS,
const float2* __restrict__ dampingAndThole) { const float2* __restrict__ dampingAndThole) {
...@@ -630,7 +630,7 @@ extern "C" __global__ void computeEDiffForce( ...@@ -630,7 +630,7 @@ extern "C" __global__ void computeEDiffForce(
const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+warp*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps; const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+warp*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(warp+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps; const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(warp+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) { for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) {
const ushort2 tileIndices = exclusionTiles[pos]; const int2 tileIndices = exclusionTiles[pos];
const unsigned int x = tileIndices.x; const unsigned int x = tileIndices.x;
const unsigned int y = tileIndices.y; const unsigned int y = tileIndices.y;
AtomData4 data; AtomData4 data;
...@@ -785,7 +785,7 @@ extern "C" __global__ void computeEDiffForce( ...@@ -785,7 +785,7 @@ extern "C" __global__ void computeEDiffForce(
while (skipTiles[tbx+TILE_SIZE-1] < pos) { while (skipTiles[tbx+TILE_SIZE-1] < pos) {
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) { if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[skipBase+tgx]; int2 tile = exclusionTiles[skipBase+tgx];
skipTiles[threadIdx.x] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2; skipTiles[threadIdx.x] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
} }
else else
......
...@@ -55,7 +55,7 @@ typedef struct { ...@@ -55,7 +55,7 @@ typedef struct {
* Compute the electrostatic field. * Compute the electrostatic field.
*/ */
extern "C" __global__ void computeField(const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions, extern "C" __global__ void computeField(const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions,
const ushort2* __restrict__ exclusionTiles, unsigned long long* __restrict__ fieldBuffers, const int2* __restrict__ exclusionTiles, unsigned long long* __restrict__ fieldBuffers,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
...@@ -75,7 +75,7 @@ extern "C" __global__ void computeField(const real4* __restrict__ posq, const un ...@@ -75,7 +75,7 @@ extern "C" __global__ void computeField(const real4* __restrict__ posq, const un
const unsigned int firstExclusionTile = warp*NUM_TILES_WITH_EXCLUSIONS/totalWarps; const unsigned int firstExclusionTile = warp*NUM_TILES_WITH_EXCLUSIONS/totalWarps;
const unsigned int lastExclusionTile = (warp+1)*NUM_TILES_WITH_EXCLUSIONS/totalWarps; const unsigned int lastExclusionTile = (warp+1)*NUM_TILES_WITH_EXCLUSIONS/totalWarps;
for (int tile = firstExclusionTile; tile < lastExclusionTile; tile++) { for (int tile = firstExclusionTile; tile < lastExclusionTile; tile++) {
const ushort2 tileIndices = exclusionTiles[tile]; const int2 tileIndices = exclusionTiles[tile];
const unsigned int x = tileIndices.x; const unsigned int x = tileIndices.x;
const unsigned int y = tileIndices.y; const unsigned int y = tileIndices.y;
real3 field = make_real3(0); real3 field = make_real3(0);
...@@ -218,7 +218,7 @@ extern "C" __global__ void computeField(const real4* __restrict__ posq, const un ...@@ -218,7 +218,7 @@ extern "C" __global__ void computeField(const real4* __restrict__ posq, const un
while (skipTiles[tbx+TILE_SIZE-1] < tile) { while (skipTiles[tbx+TILE_SIZE-1] < tile) {
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) { if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[skipBase+tgx]; int2 tile = exclusionTiles[skipBase+tgx];
skipTiles[threadIdx.x] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2; skipTiles[threadIdx.x] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
} }
else else
......
...@@ -45,7 +45,7 @@ static __inline__ __device__ long long real_shfl(long long var, int srcLane) { ...@@ -45,7 +45,7 @@ static __inline__ __device__ long long real_shfl(long long var, int srcLane) {
extern "C" __global__ void computeNonbonded( extern "C" __global__ void computeNonbonded(
unsigned long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, const tileflags* __restrict__ exclusions, unsigned long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, const tileflags* __restrict__ exclusions,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices const int2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, , const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
...@@ -68,7 +68,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -68,7 +68,7 @@ extern "C" __global__ void computeNonbonded(
const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+warp*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps; const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+warp*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(warp+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps; const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(warp+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) { for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) {
const ushort2 tileIndices = exclusionTiles[pos]; const int2 tileIndices = exclusionTiles[pos];
const unsigned int x = tileIndices.x; const unsigned int x = tileIndices.x;
const unsigned int y = tileIndices.y; const unsigned int y = tileIndices.y;
real3 force = make_real3(0); real3 force = make_real3(0);
...@@ -269,7 +269,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -269,7 +269,7 @@ extern "C" __global__ void computeNonbonded(
while (skipTiles[tbx+TILE_SIZE-1] < pos) { while (skipTiles[tbx+TILE_SIZE-1] < pos) {
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) { if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[skipBase+tgx]; int2 tile = exclusionTiles[skipBase+tgx];
skipTiles[threadIdx.x] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2; skipTiles[threadIdx.x] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
} }
else else
......
...@@ -376,7 +376,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has ...@@ -376,7 +376,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
extern "C" __global__ void computeElectrostatics( extern "C" __global__ void computeElectrostatics(
unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer,
const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices, const int2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
...@@ -396,7 +396,7 @@ extern "C" __global__ void computeElectrostatics( ...@@ -396,7 +396,7 @@ extern "C" __global__ void computeElectrostatics(
const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+warp*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps; const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+warp*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(warp+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps; const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(warp+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) { for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) {
const ushort2 tileIndices = exclusionTiles[pos]; const int2 tileIndices = exclusionTiles[pos];
const unsigned int x = tileIndices.x; const unsigned int x = tileIndices.x;
const unsigned int y = tileIndices.y; const unsigned int y = tileIndices.y;
AtomData data; AtomData data;
...@@ -523,7 +523,7 @@ extern "C" __global__ void computeElectrostatics( ...@@ -523,7 +523,7 @@ extern "C" __global__ void computeElectrostatics(
while (skipTiles[tbx+TILE_SIZE-1] < pos) { while (skipTiles[tbx+TILE_SIZE-1] < pos) {
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) { if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[skipBase+tgx]; int2 tile = exclusionTiles[skipBase+tgx];
skipTiles[threadIdx.x] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2; skipTiles[threadIdx.x] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
} }
else else
......
...@@ -437,7 +437,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr ...@@ -437,7 +437,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
*/ */
extern "C" __global__ void computeFixedField( extern "C" __global__ void computeFixedField(
unsigned long long* __restrict__ fieldBuffers, unsigned long long* __restrict__ fieldPolarBuffers, const real4* __restrict__ posq, unsigned long long* __restrict__ fieldBuffers, unsigned long long* __restrict__ fieldPolarBuffers, const real4* __restrict__ posq,
const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, const ushort2* __restrict__ exclusionTiles, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, const int2* __restrict__ exclusionTiles,
unsigned int startTileIndex, unsigned int numTileIndices, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
...@@ -458,7 +458,7 @@ extern "C" __global__ void computeFixedField( ...@@ -458,7 +458,7 @@ extern "C" __global__ void computeFixedField(
const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+warp*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps; const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+warp*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(warp+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps; const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(warp+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) { for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) {
const ushort2 tileIndices = exclusionTiles[pos]; const int2 tileIndices = exclusionTiles[pos];
const unsigned int x = tileIndices.x; const unsigned int x = tileIndices.x;
const unsigned int y = tileIndices.y; const unsigned int y = tileIndices.y;
AtomData data; AtomData data;
...@@ -627,7 +627,7 @@ extern "C" __global__ void computeFixedField( ...@@ -627,7 +627,7 @@ extern "C" __global__ void computeFixedField(
while (skipTiles[tbx+TILE_SIZE-1] < pos) { while (skipTiles[tbx+TILE_SIZE-1] < pos) {
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) { if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[skipBase+tgx]; int2 tile = exclusionTiles[skipBase+tgx];
skipTiles[threadIdx.x] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2; skipTiles[threadIdx.x] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
} }
else else
......
...@@ -357,7 +357,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de ...@@ -357,7 +357,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
* Compute the mutual induced field. * Compute the mutual induced field.
*/ */
extern "C" __global__ void computeInducedField( extern "C" __global__ void computeInducedField(
unsigned long long* __restrict__ field, unsigned long long* __restrict__ fieldPolar, const real4* __restrict__ posq, const ushort2* __restrict__ exclusionTiles, unsigned long long* __restrict__ field, unsigned long long* __restrict__ fieldPolar, const real4* __restrict__ posq, const int2* __restrict__ exclusionTiles,
const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef EXTRAPOLATED_POLARIZATION #ifdef EXTRAPOLATED_POLARIZATION
unsigned long long* __restrict__ fieldGradient, unsigned long long* __restrict__ fieldGradientPolar, unsigned long long* __restrict__ fieldGradient, unsigned long long* __restrict__ fieldGradientPolar,
...@@ -384,7 +384,7 @@ extern "C" __global__ void computeInducedField( ...@@ -384,7 +384,7 @@ extern "C" __global__ void computeInducedField(
const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+warp*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps; const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+warp*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(warp+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps; const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(warp+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) { for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) {
const ushort2 tileIndices = exclusionTiles[pos]; const int2 tileIndices = exclusionTiles[pos];
const unsigned int x = tileIndices.x; const unsigned int x = tileIndices.x;
const unsigned int y = tileIndices.y; const unsigned int y = tileIndices.y;
AtomData data; AtomData data;
...@@ -490,7 +490,7 @@ extern "C" __global__ void computeInducedField( ...@@ -490,7 +490,7 @@ extern "C" __global__ void computeInducedField(
while (skipTiles[tbx+TILE_SIZE-1] < pos) { while (skipTiles[tbx+TILE_SIZE-1] < pos) {
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) { if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[skipBase+tgx]; int2 tile = exclusionTiles[skipBase+tgx];
skipTiles[threadIdx.x] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2; skipTiles[threadIdx.x] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
} }
else else
......
...@@ -441,7 +441,7 @@ __device__ void computeSelfEnergyAndTorque(AtomData& atom1, mixed& energy) { ...@@ -441,7 +441,7 @@ __device__ void computeSelfEnergyAndTorque(AtomData& atom1, mixed& energy) {
extern "C" __global__ void computeElectrostatics( extern "C" __global__ void computeElectrostatics(
unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer,
const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices, const int2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
...@@ -461,7 +461,7 @@ extern "C" __global__ void computeElectrostatics( ...@@ -461,7 +461,7 @@ extern "C" __global__ void computeElectrostatics(
const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+warp*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps; const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+warp*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(warp+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps; const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(warp+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) { for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) {
const ushort2 tileIndices = exclusionTiles[pos]; const int2 tileIndices = exclusionTiles[pos];
const unsigned int x = tileIndices.x; const unsigned int x = tileIndices.x;
const unsigned int y = tileIndices.y; const unsigned int y = tileIndices.y;
AtomData data; AtomData data;
...@@ -590,7 +590,7 @@ extern "C" __global__ void computeElectrostatics( ...@@ -590,7 +590,7 @@ extern "C" __global__ void computeElectrostatics(
while (skipTiles[tbx+TILE_SIZE-1] < pos) { while (skipTiles[tbx+TILE_SIZE-1] < pos) {
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) { if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[skipBase+tgx]; int2 tile = exclusionTiles[skipBase+tgx];
skipTiles[threadIdx.x] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2; skipTiles[threadIdx.x] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
} }
else else
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2018 Stanford University and the Authors. * * Portions copyright (c) 2008-2020 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -536,6 +536,63 @@ void testLargeSystem() { ...@@ -536,6 +536,63 @@ void testLargeSystem() {
ASSERT_EQUAL_TOL(state.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol); ASSERT_EQUAL_TOL(state.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
} }
void testHugeSystem() {
// Create a system with over 3 million particles.
const int gridSize = 150;
const int numParticles = gridSize*gridSize*gridSize;
const double spacing = 0.3;
const double boxSize = gridSize*spacing;
System system;
system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
NonbondedForce* force = new NonbondedForce();
system.addForce(force);
force->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
force->setCutoffDistance(1.0);
force->setUseSwitchingFunction(true);
force->setSwitchingDistance(0.9);
vector<Vec3> positions;
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < gridSize; i++)
for (int j = 0; j < gridSize; j++)
for (int k = 0; k < gridSize; k++) {
system.addParticle(1.0);
force->addParticle(0.0, 0.1, 1.0);
positions.push_back(Vec3(i*spacing+genrand_real2(sfmt)*0.1, j*spacing+genrand_real2(sfmt)*0.1, k*spacing+genrand_real2(sfmt)*0.1));
}
VerletIntegrator integrator(0.01);
Context context(system, integrator, platform);
context.setPositions(positions);
// Compute the norm of the force.
State state = context.getState(State::Forces);
double norm = 0.0;
for (int i = 0; i < numParticles; ++i) {
Vec3 f = state.getForces()[i];
norm += f[0]*f[0] + f[1]*f[1] + f[2]*f[2];
}
norm = std::sqrt(norm);
// Take a small step in the direction of the energy gradient and see whether the potential energy changes by the expected amount.
const double delta = 0.3;
double step = 0.5*delta/norm;
vector<Vec3> positions2(numParticles), positions3(numParticles);
for (int i = 0; i < numParticles; ++i) {
Vec3 p = positions[i];
Vec3 f = state.getForces()[i];
positions2[i] = Vec3(p[0]-f[0]*step, p[1]-f[1]*step, p[2]-f[2]*step);
positions3[i] = Vec3(p[0]+f[0]*step, p[1]+f[1]*step, p[2]+f[2]*step);
}
context.setPositions(positions2);
State state2 = context.getState(State::Energy);
context.setPositions(positions3);
State state3 = context.getState(State::Energy);
ASSERT_EQUAL_TOL(state2.getPotentialEnergy(), state3.getPotentialEnergy()+norm*delta, 1e-5)
}
void testDispersionCorrection() { void testDispersionCorrection() {
// Create a box full of identical particles. // Create a box full of identical particles.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment