Unverified Commit 89d2ff0e authored by Anton Gorenko's avatar Anton Gorenko
Browse files

Add hipification of CUDA platform

Port changes in CUDA backend to HIP

Fix a warning about arithmetic operations on void* in HipArray::uploadSubArray

Fix "Error Initializing context ROCm 5.3.0"

    https://github.com/StreamHPC/openmm-hip/issues/3


    hipDeviceSetCacheConfig returns hipErrorNotSupported on 5.3
Co-authored-by: default avatarNick Curtis <nicholas.curtis@amd.com>
parent 8defca2d
#ifndef ENABLE_SHUFFLE
typedef struct {
real x, y, z;
real q;
real fx, fy, fz;
ATOM_PARAMETER_DATA
#ifndef PARAMETER_SIZE_IS_EVEN
real padding;
#endif
} AtomData;
#endif
#ifdef ENABLE_SHUFFLE
#define real_shfl SHFL
#endif
/**
* Compute nonbonded interactions. The kernel is separated into two parts,
* tiles with exclusions and tiles without exclusions. It relies heavily on
* implicit warp-level synchronization. A tile is defined by two atom blocks
* each of warpsize. Each warp computes a range of tiles.
*
* Tiles with exclusions compute the entire set of interactions across
* atom blocks, equal to warpsize*warpsize. In order to avoid access conflicts
* the forces are computed and accumulated diagonally in the manner shown below
* where, suppose
*
* [a-h] comprise atom block 1, [i-p] comprise atom block 2
*
* 1 denotes the first set of calculations within the warp
* 2 denotes the second set of calculations within the warp
* ... etc.
*
* threads
* 0 1 2 3 4 5 6 7
* atom1
* L a b c d e f g h
* o i 1 2 3 4 5 6 7 8
* c j 8 1 2 3 4 5 6 7
* a k 7 8 1 2 3 4 5 6
* l l 6 7 8 1 2 3 4 5
* D m 5 6 7 8 1 2 3 4
* a n 4 5 6 7 8 1 2 3
* t o 3 4 5 6 7 8 1 2
* a p 2 3 4 5 6 7 8 1
*
* Tiles without exclusions read off directly from the neighbourlist interactingAtoms
* and follows the same force accumulation method. If more there are more interactingTiles
* than the size of the neighbourlist initially allocated, the neighbourlist is rebuilt
* and the full tileset is computed. This should happen on the first step, and very rarely
* afterwards.
*
* On diagonal exclusion tiles use __shfl to broadcast. For all other types of tiles __shfl
* is used to pass around the forces, positions, and parameters when computing the forces.
*
* [out]forceBuffers - forces on each atom to eventually be accumulated
* [out]energyBuffer - energyBuffer to eventually be accumulated
* [in]posq - x,y,z,charge
* [in]exclusions - 1024-bit flags denoting atom-atom exclusions for each tile
* [in]exclusionTiles - x,y denotes the indices of tiles that have an exclusion
* [in]startTileIndex - index into first tile to be processed
* [in]numTileIndices - number of tiles this context is responsible for processing
* [in]int tiles - the atom block for each tile
* [in]interactionCount - total number of tiles that have an interaction
* [in]maxTiles - stores the size of the neighbourlist in case it needs
* - to be expanded
* [in]periodicBoxSize - size of the Periodic Box, last dimension (w) not used
* [in]invPeriodicBox - inverse of the periodicBoxSize, pre-computed for speed
* [in]blockCenter - the center of each block in euclidean coordinates
* [in]blockSize - size of the each block, radiating from the center
* - x is half the distance of total length
* - y is half the distance of total width
* - z is half the distance of total height
* - w is not used
* [in]interactingAtoms - a list of interactions within a given tile
*
*/
extern "C" __global__ void computeNonbonded(
unsigned long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, const tileflags* __restrict__ exclusions,
const int2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned long long numTileIndices
#ifdef USE_CUTOFF
, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms, unsigned int maxSinglePairs,
const int2* __restrict__ singlePairs
#endif
PARAMETER_ARGUMENTS) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE; // global warpIndex
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); // index within the warp
const unsigned int tbx = threadIdx.x - tgx; // block warpIndex
mixed energy = 0;
INIT_DERIVATIVES
// used shared memory if the device cannot shuffle
#ifndef ENABLE_SHUFFLE
__shared__ AtomData localData[THREAD_BLOCK_SIZE];
#endif
// First loop: process tiles that contain exclusions.
const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+warp*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(warp+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) {
const int2 tileIndices = exclusionTiles[pos];
const unsigned int x = tileIndices.x;
const unsigned int y = tileIndices.y;
real3 force = make_real3(0);
unsigned int atom1 = x*TILE_SIZE + tgx;
real4 posq1 = posq[atom1];
LOAD_ATOM1_PARAMETERS
#ifdef USE_EXCLUSIONS
tileflags excl = exclusions[pos*TILE_SIZE+tgx];
#endif
const bool hasExclusions = true;
if (x == y) {
// This tile is on the diagonal.
#ifdef ENABLE_SHUFFLE
real4 shflPosq = posq1;
#else
localData[threadIdx.x].x = posq1.x;
localData[threadIdx.x].y = posq1.y;
localData[threadIdx.x].z = posq1.z;
localData[threadIdx.x].q = posq1.w;
LOAD_LOCAL_PARAMETERS_FROM_1
#endif
// we do not need to fetch parameters from global since this is a symmetric tile
// instead we can broadcast the values using shuffle
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = tbx+j;
real4 posq2;
#ifdef ENABLE_SHUFFLE
BROADCAST_WARP_DATA
#else
posq2 = make_real4(localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q);
#endif
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real invR = RSQRT(r2);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j;
#ifdef USE_SYMMETRIC
real dEdR = 0.0f;
#else
real3 dEdR1 = make_real3(0);
real3 dEdR2 = make_real3(0);
#endif
#ifdef USE_EXCLUSIONS
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 1));
#endif
real tempEnergy = 0.0f;
const real interactionScale = 0.5f;
COMPUTE_INTERACTION
energy += 0.5f*tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
force.x -= delta.x*dEdR;
force.y -= delta.y*dEdR;
force.z -= delta.z*dEdR;
#else
force.x -= dEdR1.x;
force.y -= dEdR1.y;
force.z -= dEdR1.z;
#endif
#endif
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
}
}
else {
// This is an off-diagonal tile.
unsigned int j = y*TILE_SIZE + tgx;
real4 shflPosq = posq[j];
#ifdef ENABLE_SHUFFLE
real3 shflForce;
shflForce.x = 0.0f;
shflForce.y = 0.0f;
shflForce.z = 0.0f;
#else
localData[threadIdx.x].x = shflPosq.x;
localData[threadIdx.x].y = shflPosq.y;
localData[threadIdx.x].z = shflPosq.z;
localData[threadIdx.x].q = shflPosq.w;
localData[threadIdx.x].fx = 0.0f;
localData[threadIdx.x].fy = 0.0f;
localData[threadIdx.x].fz = 0.0f;
#endif
DECLARE_LOCAL_PARAMETERS
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
#ifdef USE_EXCLUSIONS
excl = (excl >> tgx) | (excl << (TILE_SIZE - tgx));
#endif
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = tbx+tj;
#ifdef ENABLE_SHUFFLE
real4 posq2 = shflPosq;
#else
real4 posq2 = make_real4(localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q);
#endif
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real invR = RSQRT(r2);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+tj;
#ifdef USE_SYMMETRIC
real dEdR = 0.0f;
#else
real3 dEdR1 = make_real3(0);
real3 dEdR2 = make_real3(0);
#endif
#ifdef USE_EXCLUSIONS
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 1));
#endif
real tempEnergy = 0.0f;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta *= dEdR;
force.x -= delta.x;
force.y -= delta.y;
force.z -= delta.z;
#ifdef ENABLE_SHUFFLE
shflForce.x += delta.x;
shflForce.y += delta.y;
shflForce.z += delta.z;
#else
localData[tbx+tj].fx += delta.x;
localData[tbx+tj].fy += delta.y;
localData[tbx+tj].fz += delta.z;
#endif
#else // !USE_SYMMETRIC
force.x -= dEdR1.x;
force.y -= dEdR1.y;
force.z -= dEdR1.z;
#ifdef ENABLE_SHUFFLE
shflForce.x += dEdR2.x;
shflForce.y += dEdR2.y;
shflForce.z += dEdR2.z;
#else
localData[tbx+tj].fx += dEdR2.x;
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif // end USE_SYMMETRIC
#endif
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
// cycles the indices
// 0 1 2 3 4 5 6 7 -> 1 2 3 4 5 6 7 0
tj = (tj + 1) & (TILE_SIZE - 1);
}
const unsigned int offset = y*TILE_SIZE + tgx;
// write results for off diagonal tiles
#ifdef INCLUDE_FORCES
#ifdef ENABLE_SHUFFLE
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (shflForce.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.z*0x100000000)));
#else
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fx*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fy*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fz*0x100000000)));
#endif
#endif
}
// Write results for on and off diagonal tiles
#ifdef INCLUDE_FORCES
const unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.z*0x100000000)));
#endif
}
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
// of them (no cutoff).
#ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(long long)numTiles/totalWarps);
int end = (int) ((warp+1)*(long long)numTiles/totalWarps);
#else
int pos = (int) (startTileIndex+warp*numTileIndices/totalWarps);
int end = (int) (startTileIndex+(warp+1)*numTileIndices/totalWarps);
#endif
int skipBase = 0;
int currentSkipIndex = tbx;
// atomIndices can probably be shuffled as well
// but it probably wouldn't make things any faster
__shared__ int atomIndices[THREAD_BLOCK_SIZE];
__shared__ volatile int skipTiles[THREAD_BLOCK_SIZE];
skipTiles[threadIdx.x] = -1;
while (pos < end) {
const bool hasExclusions = false;
real3 force = make_real3(0);
bool includeTile = true;
// Extract the coordinates of this tile.
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed.
while (skipTiles[tbx+TILE_SIZE-1] < pos) {
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
int2 tile = exclusionTiles[skipBase+tgx];
skipTiles[threadIdx.x] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
skipTiles[threadIdx.x] = end;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
}
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
#endif
if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx;
// Load atom data for this tile.
real4 posq1 = posq[atom1];
LOAD_ATOM1_PARAMETERS
//const unsigned int localAtomIndex = threadIdx.x;
#ifdef USE_CUTOFF
unsigned int j = interactingAtoms[pos*TILE_SIZE+tgx];
#else
unsigned int j = y*TILE_SIZE + tgx;
#endif
atomIndices[threadIdx.x] = j;
#ifdef ENABLE_SHUFFLE
DECLARE_LOCAL_PARAMETERS
real4 shflPosq;
real3 shflForce;
shflForce.x = 0.0f;
shflForce.y = 0.0f;
shflForce.z = 0.0f;
#endif
if (j < PADDED_NUM_ATOMS) {
// Load position of atom j from from global memory
#ifdef ENABLE_SHUFFLE
shflPosq = posq[j];
#else
localData[threadIdx.x].x = posq[j].x;
localData[threadIdx.x].y = posq[j].y;
localData[threadIdx.x].z = posq[j].z;
localData[threadIdx.x].q = posq[j].w;
localData[threadIdx.x].fx = 0.0f;
localData[threadIdx.x].fy = 0.0f;
localData[threadIdx.x].fz = 0.0f;
#endif
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
else {
#ifdef ENABLE_SHUFFLE
shflPosq = make_real4(0, 0, 0, 0);
#else
localData[threadIdx.x].x = 0;
localData[threadIdx.x].y = 0;
localData[threadIdx.x].z = 0;
#endif
CLEAR_LOCAL_PARAMETERS
}
#ifdef USE_PERIODIC
if (singlePeriodicCopy) {
// The box is small enough that we can just translate all the atoms into a single periodic
// box, then skip having to apply periodic boundary conditions later.
real4 blockCenterX = blockCenter[x];
APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
#ifdef ENABLE_SHUFFLE
APPLY_PERIODIC_TO_POS_WITH_CENTER(shflPosq, blockCenterX)
#else
APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[threadIdx.x], blockCenterX)
#endif
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = tbx+tj;
#ifdef ENABLE_SHUFFLE
real4 posq2 = shflPosq;
#else
real4 posq2 = make_real4(localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q);
#endif
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real invR = RSQRT(r2);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj];
#ifdef USE_SYMMETRIC
real dEdR = 0.0f;
#else
real3 dEdR1 = make_real3(0);
real3 dEdR2 = make_real3(0);
#endif
#ifdef USE_EXCLUSIONS
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS);
#endif
real tempEnergy = 0.0f;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta *= dEdR;
force.x -= delta.x;
force.y -= delta.y;
force.z -= delta.z;
#ifdef ENABLE_SHUFFLE
shflForce.x += delta.x;
shflForce.y += delta.y;
shflForce.z += delta.z;
#else
localData[tbx+tj].fx += delta.x;
localData[tbx+tj].fy += delta.y;
localData[tbx+tj].fz += delta.z;
#endif
#else // !USE_SYMMETRIC
force.x -= dEdR1.x;
force.y -= dEdR1.y;
force.z -= dEdR1.z;
#ifdef ENABLE_SHUFFLE
shflForce.x += dEdR2.x;
shflForce.y += dEdR2.y;
shflForce.z += dEdR2.z;
#else
localData[tbx+tj].fx += dEdR2.x;
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif // end USE_SYMMETRIC
#endif
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
tj = (tj + 1) & (TILE_SIZE - 1);
}
}
else
#endif
{
// We need to apply periodic boundary conditions separately for each interaction.
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = tbx+tj;
#ifdef ENABLE_SHUFFLE
real4 posq2 = shflPosq;
#else
real4 posq2 = make_real4(localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q);
#endif
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real invR = RSQRT(r2);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj];
#ifdef USE_SYMMETRIC
real dEdR = 0.0f;
#else
real3 dEdR1 = make_real3(0);
real3 dEdR2 = make_real3(0);
#endif
#ifdef USE_EXCLUSIONS
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS);
#endif
real tempEnergy = 0.0f;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta *= dEdR;
force.x -= delta.x;
force.y -= delta.y;
force.z -= delta.z;
#ifdef ENABLE_SHUFFLE
shflForce.x += delta.x;
shflForce.y += delta.y;
shflForce.z += delta.z;
#else
localData[tbx+tj].fx += delta.x;
localData[tbx+tj].fy += delta.y;
localData[tbx+tj].fz += delta.z;
#endif
#else // !USE_SYMMETRIC
force.x -= dEdR1.x;
force.y -= dEdR1.y;
force.z -= dEdR1.z;
#ifdef ENABLE_SHUFFLE
shflForce.x += dEdR2.x;
shflForce.y += dEdR2.y;
shflForce.z += dEdR2.z;
#else
localData[tbx+tj].fx += dEdR2.x;
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif // end USE_SYMMETRIC
#endif
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
tj = (tj + 1) & (TILE_SIZE - 1);
}
}
// Write results.
#ifdef INCLUDE_FORCES
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (force.x*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.y*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.z*0x100000000)));
#ifdef USE_CUTOFF
unsigned int atom2 = atomIndices[threadIdx.x];
#else
unsigned int atom2 = y*TILE_SIZE + tgx;
#endif
if (atom2 < PADDED_NUM_ATOMS) {
#ifdef ENABLE_SHUFFLE
atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>((long long) (shflForce.x*0x100000000)));
atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.y*0x100000000)));
atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.z*0x100000000)));
#else
atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fx*0x100000000)));
atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fy*0x100000000)));
atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fz*0x100000000)));
#endif
}
#endif
}
pos++;
}
// Third loop: single pairs that aren't part of a tile.
#if USE_CUTOFF
const unsigned int numPairs = interactionCount[1];
if (numPairs > maxSinglePairs)
return; // There wasn't enough memory for the neighbor list.
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < numPairs; i += blockDim.x*gridDim.x) {
int2 pair = singlePairs[i];
int atom1 = pair.x;
int atom2 = pair.y;
real4 posq1 = posq[atom1];
real4 posq2 = posq[atom2];
LOAD_ATOM1_PARAMETERS
int j = atom2;
atom2 = threadIdx.x;
DECLARE_LOCAL_PARAMETERS
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
LOAD_ATOM2_PARAMETERS
atom2 = pair.y;
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real invR = RSQRT(r2);
real r = r2*invR;
#ifdef USE_SYMMETRIC
real dEdR = 0.0f;
#else
real3 dEdR1 = make_real3(0);
real3 dEdR2 = make_real3(0);
#endif
bool hasExclusions = false;
bool isExcluded = false;
real tempEnergy = 0.0f;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
real3 dEdR1 = delta*dEdR;
real3 dEdR2 = -dEdR1;
#endif
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (-dEdR1.x*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (-dEdR1.y*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (-dEdR1.z*0x100000000)));
atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>((long long) (-dEdR2.x*0x100000000)));
atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (-dEdR2.y*0x100000000)));
atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (-dEdR2.z*0x100000000)));
#endif
}
#endif
#ifdef INCLUDE_ENERGY
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
#endif
SAVE_DERIVATIVES
}
/**
* Sum the forces computed by different contexts.
*/
extern "C" __global__ void sumForces(long long* __restrict__ force, long long* __restrict__ buffer, int bufferSize, int numBuffers) {
int totalSize = bufferSize*numBuffers;
for (int index = blockDim.x*blockIdx.x+threadIdx.x; index < bufferSize; index += blockDim.x*gridDim.x) {
long long sum = force[index];
for (int i = index; i < totalSize; i += bufferSize)
sum += buffer[i];
force[index] = sum;
}
}
__device__ KEY_TYPE getValue(DATA_TYPE value) {
return SORT_KEY;
}
extern "C" {
/**
* Sort a list that is short enough to entirely fit in local memory. This is executed as
* a single thread block.
*/
__global__ void sortShortList(DATA_TYPE* __restrict__ data, unsigned int length) {
// Load the data into local memory.
HIP_DYNAMIC_SHARED( DATA_TYPE, dataBuffer)
for (int index = threadIdx.x; index < length; index += blockDim.x)
dataBuffer[index] = data[index];
__syncthreads();
// Perform a bitonic sort in local memory.
for (unsigned int k = 2; k < 2*length; k *= 2) {
for (unsigned int j = k/2; j > 0; j /= 2) {
for (unsigned int i = threadIdx.x; i < length; i += blockDim.x) {
int ixj = i^j;
if (ixj > i && ixj < length) {
DATA_TYPE value1 = dataBuffer[i];
DATA_TYPE value2 = dataBuffer[ixj];
bool ascending = ((i&k) == 0);
for (unsigned int mask = k*2; mask < 2*length; mask *= 2)
ascending = ((i&mask) == 0 ? !ascending : ascending);
KEY_TYPE lowKey = (ascending ? getValue(value1) : getValue(value2));
KEY_TYPE highKey = (ascending ? getValue(value2) : getValue(value1));
if (lowKey > highKey) {
dataBuffer[i] = value2;
dataBuffer[ixj] = value1;
}
}
}
__syncthreads();
}
}
// Write the data back to global memory.
for (int index = threadIdx.x; index < length; index += blockDim.x)
data[index] = dataBuffer[index];
}
/**
* An alternate kernel for sorting short lists. In this version every thread does a full
* scan through the data to select the destination for one element. This involves more
* work, but also parallelizes much better.
*/
__global__ void sortShortList2(const DATA_TYPE* __restrict__ dataIn, DATA_TYPE* __restrict__ dataOut, unsigned int length) {
__shared__ DATA_TYPE dataBuffer[64];
int globalId = blockDim.x*blockIdx.x+threadIdx.x;
DATA_TYPE value = dataIn[globalId < length ? globalId : 0];
KEY_TYPE key = getValue(value);
int count = 0;
for (int blockStart = 0; blockStart < length; blockStart += blockDim.x) {
int numInBlock = min(static_cast<int>(blockDim.x), static_cast<int>(length-blockStart));
__syncthreads();
if (threadIdx.x < numInBlock)
dataBuffer[threadIdx.x] = dataIn[blockStart+threadIdx.x];
__syncthreads();
for (int i = 0; i < numInBlock; i++) {
KEY_TYPE otherKey = getValue(dataBuffer[i]);
if (otherKey < key || (otherKey == key && blockStart+i < globalId))
count++;
}
}
if (globalId < length)
dataOut[count] = value;
}
/**
* Calculate the minimum and maximum value in the array to be sorted. This kernel
* is executed as a single work group.
*/
__global__ void computeRange(const DATA_TYPE* __restrict__ data, unsigned int length, KEY_TYPE* __restrict__ range,
unsigned int numBuckets, unsigned int* __restrict__ bucketOffset) {
HIP_DYNAMIC_SHARED( KEY_TYPE, minBuffer)
KEY_TYPE* maxBuffer = minBuffer+blockDim.x;
KEY_TYPE minimum = MAX_KEY;
KEY_TYPE maximum = MIN_KEY;
// Each thread calculates the range of a subset of values.
for (unsigned int index = threadIdx.x; index < length; index += blockDim.x) {
KEY_TYPE value = getValue(data[index]);
minimum = min(minimum, value);
maximum = max(maximum, value);
}
// Now reduce them.
minBuffer[threadIdx.x] = minimum;
maxBuffer[threadIdx.x] = maximum;
__syncthreads();
for (unsigned int step = 1; step < blockDim.x; step *= 2) {
if (threadIdx.x+step < blockDim.x && threadIdx.x%(2*step) == 0) {
minBuffer[threadIdx.x] = min(minBuffer[threadIdx.x], minBuffer[threadIdx.x+step]);
maxBuffer[threadIdx.x] = max(maxBuffer[threadIdx.x], maxBuffer[threadIdx.x+step]);
}
__syncthreads();
}
minimum = minBuffer[0];
maximum = maxBuffer[0];
if (threadIdx.x == 0) {
range[0] = minimum;
range[1] = maximum;
}
// Clear the bucket counters in preparation for the next kernel.
for (unsigned int index = threadIdx.x; index < numBuckets; index += blockDim.x)
bucketOffset[index] = 0;
}
/**
* Assign elements to buckets.
*/
__global__ void assignElementsToBuckets(const DATA_TYPE* __restrict__ data, unsigned int length, unsigned int numBuckets, const KEY_TYPE* __restrict__ range,
unsigned int* __restrict__ bucketOffset, unsigned int* __restrict__ bucketOfElement, unsigned int* __restrict__ offsetInBucket) {
float minValue = (float) (range[0]);
float maxValue = (float) (range[1]);
float bucketWidth = (maxValue-minValue)/numBuckets;
for (unsigned int index = blockDim.x*blockIdx.x+threadIdx.x; index < length; index += blockDim.x*gridDim.x) {
float key = (float) getValue(data[index]);
unsigned int bucketIndex = min((unsigned int) ((key-minValue)/bucketWidth), numBuckets-1);
offsetInBucket[index] = atomicAdd(&bucketOffset[bucketIndex], 1);
bucketOfElement[index] = bucketIndex;
}
}
/**
* Sum the bucket sizes to compute the start position of each bucket. This kernel
* is executed as a single work group.
*/
__global__ void computeBucketPositions(unsigned int numBuckets, unsigned int* __restrict__ bucketOffset) {
HIP_DYNAMIC_SHARED( unsigned int, posBuffer)
unsigned int globalOffset = 0;
for (unsigned int startBucket = 0; startBucket < numBuckets; startBucket += blockDim.x) {
// Load the bucket sizes into local memory.
unsigned int globalIndex = startBucket+threadIdx.x;
__syncthreads();
posBuffer[threadIdx.x] = (globalIndex < numBuckets ? bucketOffset[globalIndex] : 0);
__syncthreads();
// Perform a parallel prefix sum.
for (unsigned int step = 1; step < blockDim.x; step *= 2) {
unsigned int add = (threadIdx.x >= step ? posBuffer[threadIdx.x-step] : 0);
__syncthreads();
posBuffer[threadIdx.x] += add;
__syncthreads();
}
// Write the results back to global memory.
if (globalIndex < numBuckets)
bucketOffset[globalIndex] = posBuffer[threadIdx.x]+globalOffset;
globalOffset += posBuffer[blockDim.x-1];
}
}
/**
* Copy the input data into the buckets for sorting.
*/
__global__ void copyDataToBuckets(const DATA_TYPE* __restrict__ data, DATA_TYPE* __restrict__ buckets, unsigned int length, const unsigned int* __restrict__ bucketOffset, const unsigned int* __restrict__ bucketOfElement, const unsigned int* __restrict__ offsetInBucket) {
for (unsigned int index = blockDim.x*blockIdx.x+threadIdx.x; index < length; index += blockDim.x*gridDim.x) {
DATA_TYPE element = data[index];
unsigned int bucketIndex = bucketOfElement[index];
unsigned int offset = (bucketIndex == 0 ? 0 : bucketOffset[bucketIndex-1]);
buckets[offset+offsetInBucket[index]] = element;
}
}
/**
* Sort the data in each bucket.
*/
__global__ void sortBuckets(DATA_TYPE* __restrict__ data, const DATA_TYPE* __restrict__ buckets, unsigned int numBuckets, const unsigned int* __restrict__ bucketOffset) {
HIP_DYNAMIC_SHARED( DATA_TYPE, dataBuffer)
for (unsigned int index = blockIdx.x; index < numBuckets; index += gridDim.x) {
unsigned int startIndex = (index == 0 ? 0 : bucketOffset[index-1]);
unsigned int endIndex = bucketOffset[index];
unsigned int length = endIndex-startIndex;
if (length <= blockDim.x) {
// Load the data into local memory.
if (threadIdx.x < length)
dataBuffer[threadIdx.x] = buckets[startIndex+threadIdx.x];
else
dataBuffer[threadIdx.x] = MAX_VALUE;
__syncthreads();
// Perform a bitonic sort in local memory.
for (unsigned int k = 2; k <= blockDim.x; k *= 2) {
for (unsigned int j = k/2; j > 0; j /= 2) {
int ixj = threadIdx.x^j;
if (ixj > threadIdx.x) {
DATA_TYPE value1 = dataBuffer[threadIdx.x];
DATA_TYPE value2 = dataBuffer[ixj];
bool ascending = (threadIdx.x&k) == 0;
KEY_TYPE lowKey = (ascending ? getValue(value1) : getValue(value2));
KEY_TYPE highKey = (ascending ? getValue(value2) : getValue(value1));
if (lowKey > highKey) {
dataBuffer[threadIdx.x] = value2;
dataBuffer[ixj] = value1;
}
}
__syncthreads();
}
}
// Write the data to the sorted array.
if (threadIdx.x < length)
data[startIndex+threadIdx.x] = dataBuffer[threadIdx.x];
}
else {
// Copy the bucket data over to the output array.
for (unsigned int i = threadIdx.x; i < length; i += blockDim.x)
data[startIndex+i] = buckets[startIndex+i];
__threadfence_block();
__syncthreads();
// Perform a bitonic sort in global memory.
for (unsigned int k = 2; k < 2*length; k *= 2) {
for (unsigned int j = k/2; j > 0; j /= 2) {
for (unsigned int i = threadIdx.x; i < length; i += blockDim.x) {
int ixj = i^j;
if (ixj > i && ixj < length) {
DATA_TYPE value1 = data[startIndex+i];
DATA_TYPE value2 = data[startIndex+ixj];
bool ascending = ((i&k) == 0);
for (unsigned int mask = k*2; mask < 2*length; mask *= 2)
ascending = ((i&mask) == 0 ? !ascending : ascending);
KEY_TYPE lowKey = (ascending ? getValue(value1) : getValue(value2));
KEY_TYPE highKey = (ascending ? getValue(value2) : getValue(value1));
if (lowKey > highKey) {
data[startIndex+i] = value2;
data[startIndex+ixj] = value1;
}
}
}
__threadfence_block();
__syncthreads();
}
}
}
}
}
}
extern "C" {
/**
* This is called by the various functions below to clear a buffer.
*/
__device__ void clearSingleBuffer(int* __restrict__ buffer, int size) {
int index = blockDim.x*blockIdx.x+threadIdx.x;
int4* buffer4 = (int4*) buffer;
int sizeDiv4 = size/4;
while (index < sizeDiv4) {
buffer4[index] = make_int4(0);
index += blockDim.x*gridDim.x;
}
if (blockDim.x*blockIdx.x+threadIdx.x == 0)
for (int i = sizeDiv4*4; i < size; i++)
buffer[i] = 0;
}
/**
* Fill a buffer with 0.
*/
__global__ void clearBuffer(int* __restrict__ buffer, int size) {
clearSingleBuffer(buffer, size);
}
/**
* Fill two buffers with 0.
*/
__global__ void clearTwoBuffers(int* __restrict__ buffer1, int size1, int* __restrict__ buffer2, int size2) {
clearSingleBuffer(buffer1, size1);
clearSingleBuffer(buffer2, size2);
}
/**
* Fill three buffers with 0.
*/
__global__ void clearThreeBuffers(int* __restrict__ buffer1, int size1, int* __restrict__ buffer2, int size2, int* __restrict__ buffer3, int size3) {
clearSingleBuffer(buffer1, size1);
clearSingleBuffer(buffer2, size2);
clearSingleBuffer(buffer3, size3);
}
/**
* Fill four buffers with 0.
*/
__global__ void clearFourBuffers(int* __restrict__ buffer1, int size1, int* __restrict__ buffer2, int size2, int* __restrict__ buffer3, int size3, int* __restrict__ buffer4, int size4) {
clearSingleBuffer(buffer1, size1);
clearSingleBuffer(buffer2, size2);
clearSingleBuffer(buffer3, size3);
clearSingleBuffer(buffer4, size4);
}
/**
* Fill five buffers with 0.
*/
__global__ void clearFiveBuffers(int* __restrict__ buffer1, int size1, int* __restrict__ buffer2, int size2, int* __restrict__ buffer3, int size3, int* __restrict__ buffer4, int size4, int* __restrict__ buffer5, int size5) {
clearSingleBuffer(buffer1, size1);
clearSingleBuffer(buffer2, size2);
clearSingleBuffer(buffer3, size3);
clearSingleBuffer(buffer4, size4);
clearSingleBuffer(buffer5, size5);
}
/**
* Fill six buffers with 0.
*/
__global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __restrict__ buffer2, int size2, int* __restrict__ buffer3, int size3, int* __restrict__ buffer4, int size4, int* __restrict__ buffer5, int size5, int* __restrict__ buffer6, int size6) {
clearSingleBuffer(buffer1, size1);
clearSingleBuffer(buffer2, size2);
clearSingleBuffer(buffer3, size3);
clearSingleBuffer(buffer4, size4);
clearSingleBuffer(buffer5, size5);
clearSingleBuffer(buffer6, size6);
}
/**
* Sum the energy buffer.
*/
__global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __restrict__ result, int bufferSize, int workGroupSize) {
HIP_DYNAMIC_SHARED( mixed, tempBuffer)
const unsigned int thread = threadIdx.x;
mixed sum = 0;
for (unsigned int index = thread; index < bufferSize; index += blockDim.x)
sum += energyBuffer[index];
tempBuffer[thread] = sum;
for (int i = 1; i < workGroupSize; i *= 2) {
__syncthreads();
if (thread%(i*2) == 0 && thread+i < workGroupSize)
tempBuffer[thread] += tempBuffer[thread+i];
}
if (thread == 0)
*result = tempBuffer[0];
}
/**
* Record the atomic charges into the posq array.
*/
__global__ void setCharges(real* __restrict__ charges, real4* __restrict__ posq, int* __restrict__ atomOrder, int numAtoms) {
for (int i = blockDim.x*blockIdx.x+threadIdx.x; i < numAtoms; i += blockDim.x*gridDim.x)
posq[i].w = charges[atomOrder[i]];
}
}
/**
* This file defines vector operations to simplify code elsewhere.
*/
// Versions of make_x() that take a single value and set all components to that.
inline __device__ int2 make_int2(int a) {
return make_int2(a, a);
}
inline __device__ int3 make_int3(int a) {
return make_int3(a, a, a);
}
inline __device__ int4 make_int4(int a) {
return make_int4(a, a, a, a);
}
inline __device__ float2 make_float2(float a) {
return make_float2(a, a);
}
inline __device__ float3 make_float3(float a) {
return make_float3(a, a, a);
}
inline __device__ float4 make_float4(float a) {
return make_float4(a, a, a, a);
}
inline __device__ double2 make_double2(double a) {
return make_double2(a, a);
}
inline __device__ double3 make_double3(double a) {
return make_double3(a, a, a);
}
inline __device__ double4 make_double4(double a) {
return make_double4(a, a, a, a);
}
// Multiply a vector by a constant.
inline __device__ int2 operator*(int2 a, int b) {
return make_int2(a.x*b, a.y*b);
}
inline __device__ int3 operator*(int3 a, int b) {
return make_int3(a.x*b, a.y*b, a.z*b);
}
inline __device__ int4 operator*(int4 a, int b) {
return make_int4(a.x*b, a.y*b, a.z*b, a.w*b);
}
inline __device__ int2 operator*(int a, int2 b) {
return make_int2(a*b.x, a*b.y);
}
inline __device__ int3 operator*(int a, int3 b) {
return make_int3(a*b.x, a*b.y, a*b.z);
}
inline __device__ int4 operator*(int a, int4 b) {
return make_int4(a*b.x, a*b.y, a*b.z, a*b.w);
}
inline __device__ float2 operator*(float2 a, float b) {
return make_float2(a.x*b, a.y*b);
}
inline __device__ float3 operator*(float3 a, float b) {
return make_float3(a.x*b, a.y*b, a.z*b);
}
inline __device__ float4 operator*(float4 a, float b) {
return make_float4(a.x*b, a.y*b, a.z*b, a.w*b);
}
inline __device__ float2 operator*(float a, float2 b) {
return make_float2(a*b.x, a*b.y);
}
inline __device__ float3 operator*(float a, float3 b) {
return make_float3(a*b.x, a*b.y, a*b.z);
}
inline __device__ float4 operator*(float a, float4 b) {
return make_float4(a*b.x, a*b.y, a*b.z, a*b.w);
}
inline __device__ double2 operator*(double2 a, double b) {
return make_double2(a.x*b, a.y*b);
}
inline __device__ double3 operator*(double3 a, double b) {
return make_double3(a.x*b, a.y*b, a.z*b);
}
inline __device__ double4 operator*(double4 a, double b) {
return make_double4(a.x*b, a.y*b, a.z*b, a.w*b);
}
inline __device__ double2 operator*(double a, double2 b) {
return make_double2(a*b.x, a*b.y);
}
inline __device__ double3 operator*(double a, double3 b) {
return make_double3(a*b.x, a*b.y, a*b.z);
}
inline __device__ double4 operator*(double a, double4 b) {
return make_double4(a*b.x, a*b.y, a*b.z, a*b.w);
}
// Divide a vector by a constant.
inline __device__ int2 operator/(int2 a, int b) {
return make_int2(a.x/b, a.y/b);
}
inline __device__ int3 operator/(int3 a, int b) {
return make_int3(a.x/b, a.y/b, a.z/b);
}
inline __device__ int4 operator/(int4 a, int b) {
return make_int4(a.x/b, a.y/b, a.z/b, a.w/b);
}
inline __device__ float2 operator/(float2 a, float b) {
float scale = 1.0f/b;
return a*scale;
}
inline __device__ float3 operator/(float3 a, float b) {
float scale = 1.0f/b;
return a*scale;
}
inline __device__ float4 operator/(float4 a, float b) {
float scale = 1.0f/b;
return a*scale;
}
inline __device__ double2 operator/(double2 a, double b) {
double scale = 1.0/b;
return a*scale;
}
inline __device__ double3 operator/(double3 a, double b) {
double scale = 1.0/b;
return a*scale;
}
inline __device__ double4 operator/(double4 a, double b) {
double scale = 1.0/b;
return a*scale;
}
// *= operator (multiply vector by constant)
inline __device__ void operator*=(int2& a, int b) {
a.x *= b; a.y *= b;
}
inline __device__ void operator*=(int3& a, int b) {
a.x *= b; a.y *= b; a.z *= b;
}
inline __device__ void operator*=(int4& a, int b) {
a.x *= b; a.y *= b; a.z *= b; a.w *= b;
}
inline __device__ void operator*=(float2& a, float b) {
a.x *= b; a.y *= b;
}
inline __device__ void operator*=(float3& a, float b) {
a.x *= b; a.y *= b; a.z *= b;
}
inline __device__ void operator*=(float4& a, float b) {
a.x *= b; a.y *= b; a.z *= b; a.w *= b;
}
inline __device__ void operator*=(double2& a, double b) {
a.x *= b; a.y *= b;
}
inline __device__ void operator*=(double3& a, double b) {
a.x *= b; a.y *= b; a.z *= b;
}
inline __device__ void operator*=(double4& a, double b) {
a.x *= b; a.y *= b; a.z *= b; a.w *= b;
}
// Dot product
inline __device__ float dot(float3 a, float3 b) {
return a.x*b.x+a.y*b.y+a.z*b.z;
}
inline __device__ double dot(double3 a, double3 b) {
return a.x*b.x+a.y*b.y+a.z*b.z;
}
// Cross product
inline __device__ float3 cross(float3 a, float3 b) {
return make_float3(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
}
inline __device__ float4 cross(float4 a, float4 b) {
return make_float4(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x, 0.0f);
}
inline __device__ double3 cross(double3 a, double3 b) {
return make_double3(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
}
inline __device__ double4 cross(double4 a, double4 b) {
return make_double4(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x, 0.0);
}
// Normalize a vector
inline __device__ float2 normalize(float2 a) {
return a*rsqrtf(a.x*a.x+a.y*a.y);
}
inline __device__ float3 normalize(float3 a) {
return a*rsqrtf(a.x*a.x+a.y*a.y+a.z*a.z);
}
inline __device__ float4 normalize(float4 a) {
return a*rsqrtf(a.x*a.x+a.y*a.y+a.z*a.z+a.w*a.w);
}
inline __device__ double2 normalize(double2 a) {
return a*rsqrt(a.x*a.x+a.y*a.y);
}
inline __device__ double3 normalize(double3 a) {
return a*rsqrt(a.x*a.x+a.y*a.y+a.z*a.z);
}
inline __device__ double4 normalize(double4 a) {
return a*rsqrt(a.x*a.x+a.y*a.y+a.z*a.z+a.w*a.w);
}
// Strip off the fourth component of a vector.
inline __device__ short3 trimTo3(short4 v) {
return make_short3(v.x, v.y, v.z);
}
inline __device__ int3 trimTo3(int4 v) {
return make_int3(v.x, v.y, v.z);
}
inline __device__ float3 trimTo3(float4 v) {
return make_float3(v.x, v.y, v.z);
}
inline __device__ double3 trimTo3(double4 v) {
return make_double3(v.x, v.y, v.z);
}
#
# Include HIP related files.
#
# add include / link dirs
INCLUDE_DIRECTORIES(${MMHIP_INCLUDE_DIRS})
LINK_DIRECTORIES(${MMHIP_LINK_DIRS})
FILE(GLOB HIP_KERNELS ${KERNEL_SOURCE_DIR}/kernels/*.hip)
ADD_CUSTOM_COMMAND(OUTPUT ${KERNELS_CPP} ${KERNELS_H}
COMMAND ${CMAKE_COMMAND}
ARGS -D KERNEL_SOURCE_DIR=${KERNEL_SOURCE_DIR} -D KERNELS_CPP=${KERNELS_CPP} -D KERNELS_H=${KERNELS_H} -D KERNEL_SOURCE_CLASS=${KERNEL_SOURCE_CLASS} -D KERNEL_FILE_EXTENSION=hip -P ${CMAKE_SOURCE_DIR}/cmake_modules/EncodeKernelFiles.cmake
DEPENDS ${HIP_KERNELS}
)
# set HIP compile flags
SET_SOURCE_FILES_PROPERTIES(${SOURCE_FILES} PROPERTIES COMPILE_FLAGS "${HIPCXXFLAGS}")
SET_SOURCE_FILES_PROPERTIES(${KERNELS_CPP} ${KERNELS_H} PROPERTIES GENERATED TRUE)
ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
TARGET_LINK_LIBRARIES(${STATIC_TARGET} ${OPENMM_LIBRARY_NAME} ${MMHIP_LIBS} ${PTHREADS_LIB_STATIC})
SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_COMMON_BUILDING_STATIC_LIBRARY")
IF (APPLE)
SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS} -F/Library/Frameworks -framework HIP")
ELSE (APPLE)
SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}")
ENDIF (APPLE)
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${STATIC_TARGET})
#
# Testing
#
ENABLE_TESTING()
# add include / link dirs
INCLUDE_DIRECTORIES(${MMHIP_INCLUDE_DIRS})
LINK_DIRECTORIES(${MMHIP_LINK_DIRS})
SET(OPENMM_BUILD_HIP_DOUBLE_PRECISION_TESTS TRUE CACHE BOOL "Whether to build double precision versions of HIP test cases")
SET( INCLUDE_SERIALIZATION FALSE )
#SET( INCLUDE_SERIALIZATION TRUE )
IF( INCLUDE_SERIALIZATION )
INCLUDE_DIRECTORIES(${OPENMM_DIR}/serialization/include)
SET( SHARED_OPENMM_SERIALIZATION "OpenMMSerialization" )
ENDIF( INCLUDE_SERIALIZATION )
# Automatically create tests using files named "Test*.cpp"
FILE(GLOB TEST_PROGS "*Test*.cpp")
FOREACH(TEST_PROG ${TEST_PROGS})
GET_FILENAME_COMPONENT(TEST_ROOT ${TEST_PROG} NAME_WE)
# Link with shared library
ADD_EXECUTABLE(${TEST_ROOT} ${TEST_PROG})
TARGET_LINK_LIBRARIES(${TEST_ROOT} ${SHARED_TARGET})
IF (APPLE)
SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS} -F/Library/Frameworks -framework HIP" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} ${HIPCXXFLAGS}")
ELSE (APPLE)
SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} ${HIPCXXFLAGS}")
ENDIF (APPLE)
ADD_TEST(${TEST_ROOT}Single ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT} single)
IF (OPENMM_BUILD_HIP_DOUBLE_PRECISION_TESTS)
ADD_TEST(${TEST_ROOT}Mixed ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT} mixed)
ADD_TEST(${TEST_ROOT}Double ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT} double)
ENDIF(OPENMM_BUILD_HIP_DOUBLE_PRECISION_TESTS)
ENDFOREACH(TEST_PROG ${TEST_PROGS})
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015-2016 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#ifdef WIN32
#define _USE_MATH_DEFINES // Needed to get M_PI
#endif
#include "HipPlatform.h"
#include <string>
OpenMM::HipPlatform platform;
void initializeTests(int argc, char* argv[]) {
if (argc > 1)
platform.setPropertyDefaultValue("Precision", std::string(argv[1]));
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestAndersenThermostat.h"
void runPlatformTests() {
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestBrownianIntegrator.h"
void runPlatformTests() {
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestCMAPTorsionForce.h"
void runPlatformTests() {
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestCMMotionRemover.h"
void runPlatformTests() {
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012-2015 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestCheckpoints.h"
void testCheckpoint() {
const int numParticles = 100;
const double boxSize = 5.0;
const double temperature = 200.0;
System system;
system.addForce(new AndersenThermostat(0.0, 100.0));
NonbondedForce* nonbonded = new NonbondedForce();
system.addForce(nonbonded);
nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
vector<Vec3> positions(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; i++) {
system.addParticle(1.0);
nonbonded->addParticle(i%2 == 0 ? 0.1 : -0.1, 0.2, 0.1);
bool clash;
do {
clash = false;
positions[i] = Vec3(boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt));
for (int j = 0; j < i; j++) {
Vec3 delta = positions[i]-positions[j];
if (sqrt(delta.dot(delta)) < 0.1)
clash = true;
}
} while (clash);
}
VerletIntegrator integrator(0.001);
Context context(system, integrator, platform);
context.setPositions(positions);
context.setPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
context.setParameter(AndersenThermostat::Temperature(), temperature);
// Run for a little while.
integrator.step(100);
// Record the current state and make a checkpoint.
State s1 = context.getState(State::Positions | State::Velocities | State::Parameters);
stringstream stream1(ios_base::out | ios_base::in | ios_base::binary);
context.createCheckpoint(stream1);
// Continue the simulation for a few more steps and record the state again.
integrator.step(10);
State s2 = context.getState(State::Positions | State::Velocities | State::Parameters);
// Restore from the checkpoint and see if everything gets restored correctly.
context.setPeriodicBoxVectors(Vec3(2*boxSize, 0, 0), Vec3(0, 2*boxSize, 0), Vec3(0, 0, 2*boxSize));
context.setParameter(AndersenThermostat::Temperature(), temperature+10);
context.loadCheckpoint(stream1);
State s3 = context.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s1, s3);
// Now simulate from there and see if the trajectory is identical.
integrator.step(10);
State s4 = context.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s2, s4);
// Create a new Context that uses multiple devices.
string deviceIndex = platform.getPropertyValue(context, HipPlatform::HipDeviceIndex());
map<string, string> props;
props[HipPlatform::HipDeviceIndex()] = deviceIndex+","+deviceIndex;
VerletIntegrator integrator2(0.001);
Context context2(system, integrator2, platform, props);
context2.setPositions(positions);
context2.setPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
context2.setParameter(AndersenThermostat::Temperature(), temperature);
// Now repeat all of the above tests with it.
integrator2.step(100);
State s5 = context2.getState(State::Positions | State::Velocities | State::Parameters);
stringstream stream2(ios_base::out | ios_base::in | ios_base::binary);
context2.createCheckpoint(stream2);
integrator2.step(10);
State s6 = context2.getState(State::Positions | State::Velocities | State::Parameters);
context2.setPeriodicBoxVectors(Vec3(2*boxSize, 0, 0), Vec3(0, 2*boxSize, 0), Vec3(0, 0, 2*boxSize));
context2.setParameter(AndersenThermostat::Temperature(), temperature+10);
context2.loadCheckpoint(stream2);
State s7 = context2.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s5, s7);
integrator2.step(10);
State s8 = context2.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s6, s8);
// See if a checkpoint created from one Context can be loaded into a different one.
VerletIntegrator integrator3(0.001);
Context context3(system, integrator3, platform);
stream1.seekg(0, stream1.beg);
context3.loadCheckpoint(stream1);
State s9 = context3.getState(State::Positions | State::Velocities | State::Parameters | State::Energy);
compareStates(s1, s9);
}
void runPlatformTests() {
testCheckpoint();
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestCompoundIntegrator.h"
void runPlatformTests() {
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestCustomAngleForce.h"
void testParallelComputation() {
System system;
const int numParticles = 200;
for (int i = 0; i < numParticles; i++)
system.addParticle(1.0);
CustomAngleForce* force = new CustomAngleForce("(theta-1.1)^2");
vector<double> params;
for (int i = 2; i < numParticles; i++)
force->addAngle(i-2, i-1, i, params);
system.addForce(force);
vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; i++)
positions[i] = Vec3(i, i%2, 0);
VerletIntegrator integrator1(0.01);
Context context1(system, integrator1, platform);
context1.setPositions(positions);
State state1 = context1.getState(State::Forces | State::Energy);
VerletIntegrator integrator2(0.01);
string deviceIndex = platform.getPropertyValue(context1, HipPlatform::HipDeviceIndex());
map<string, string> props;
props[HipPlatform::HipDeviceIndex()] = deviceIndex+","+deviceIndex;
Context context2(system, integrator2, platform, props);
context2.setPositions(positions);
State state2 = context2.getState(State::Forces | State::Energy);
ASSERT_EQUAL_TOL(state1.getPotentialEnergy(), state2.getPotentialEnergy(), 1e-5);
for (int i = 0; i < numParticles; i++)
ASSERT_EQUAL_VEC(state1.getForces()[i], state2.getForces()[i], 1e-5);
}
void runPlatformTests() {
testParallelComputation();
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestCustomBondForce.h"
void testParallelComputation() {
System system;
const int numParticles = 200;
for (int i = 0; i < numParticles; i++)
system.addParticle(1.0);
CustomBondForce* force = new CustomBondForce(("(r-1.1)^2"));
vector<double> params;
for (int i = 1; i < numParticles; i++)
force->addBond(i-1, i, params);
system.addForce(force);
vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; i++)
positions[i] = Vec3(i, 0, 0);
VerletIntegrator integrator1(0.01);
Context context1(system, integrator1, platform);
context1.setPositions(positions);
State state1 = context1.getState(State::Forces | State::Energy);
VerletIntegrator integrator2(0.01);
string deviceIndex = platform.getPropertyValue(context1, HipPlatform::HipDeviceIndex());
map<string, string> props;
props[HipPlatform::HipDeviceIndex()] = deviceIndex+","+deviceIndex;
Context context2(system, integrator2, platform, props);
context2.setPositions(positions);
State state2 = context2.getState(State::Forces | State::Energy);
ASSERT_EQUAL_TOL(state1.getPotentialEnergy(), state2.getPotentialEnergy(), 1e-5);
for (int i = 0; i < numParticles; i++)
ASSERT_EQUAL_VEC(state1.getForces()[i], state2.getForces()[i], 1e-5);
}
void runPlatformTests() {
testParallelComputation();
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestCustomCVForce.h"
void runPlatformTests() {
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestCustomCentroidBondForce.h"
void runPlatformTests() {
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012-2015 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestCustomCompoundBondForce.h"
void testParallelComputation() {
System system;
const int numParticles = 200;
for (int i = 0; i < numParticles; i++)
system.addParticle(1.0);
CustomCompoundBondForce* force = new CustomCompoundBondForce(2, ("(distance(p1,p2)-1.1)^2"));
vector<int> particles(2);
vector<double> params;
for (int i = 1; i < numParticles; i++) {
particles[0] = i-1;
particles[1] = i;
force->addBond(particles, params);
}
system.addForce(force);
vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; i++)
positions[i] = Vec3(i, 0, 0);
VerletIntegrator integrator1(0.01);
Context context1(system, integrator1, platform);
context1.setPositions(positions);
State state1 = context1.getState(State::Forces | State::Energy);
VerletIntegrator integrator2(0.01);
string deviceIndex = platform.getPropertyValue(context1, HipPlatform::HipDeviceIndex());
map<string, string> props;
props[HipPlatform::HipDeviceIndex()] = deviceIndex+","+deviceIndex;
Context context2(system, integrator2, platform, props);
context2.setPositions(positions);
State state2 = context2.getState(State::Forces | State::Energy);
ASSERT_EQUAL_TOL(state1.getPotentialEnergy(), state2.getPotentialEnergy(), 1e-5);
for (int i = 0; i < numParticles; i++)
ASSERT_EQUAL_VEC(state1.getForces()[i], state2.getForces()[i], 1e-5);
}
void runPlatformTests() {
testParallelComputation();
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestCustomExternalForce.h"
#include "sfmt/SFMT.h"
void testParallelComputation() {
System system;
const int numParticles = 200;
for (int i = 0; i < numParticles; i++)
system.addParticle(1.0);
CustomExternalForce* force = new CustomExternalForce("x^2+y^2+z^2");
vector<double> params;
for (int i = 0; i < numParticles; i++)
force->addParticle(i, params);
system.addForce(force);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; i++)
positions[i] = Vec3(5*genrand_real2(sfmt), 5*genrand_real2(sfmt), 5*genrand_real2(sfmt));
VerletIntegrator integrator1(0.01);
Context context1(system, integrator1, platform);
context1.setPositions(positions);
State state1 = context1.getState(State::Forces | State::Energy);
VerletIntegrator integrator2(0.01);
string deviceIndex = platform.getPropertyValue(context1, HipPlatform::HipDeviceIndex());
map<string, string> props;
props[HipPlatform::HipDeviceIndex()] = deviceIndex+","+deviceIndex;
Context context2(system, integrator2, platform, props);
context2.setPositions(positions);
State state2 = context2.getState(State::Forces | State::Energy);
ASSERT_EQUAL_TOL(state1.getPotentialEnergy(), state2.getPotentialEnergy(), 1e-5);
for (int i = 0; i < numParticles; i++)
ASSERT_EQUAL_VEC(state1.getForces()[i], state2.getForces()[i], 1e-5);
}
void runPlatformTests() {
testParallelComputation();
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment