Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
3b8df952
Unverified
Commit
3b8df952
authored
Sep 05, 2024
by
Peter Eastman
Committed by
GitHub
Sep 05, 2024
Browse files
Merge pull request #4632 from ex-rzr/make-hip-standard-platform
HIP platform
parents
5ce6a85d
28fb2918
Changes
128
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2599 additions
and
0 deletions
+2599
-0
platforms/hip/src/kernels/common.hip
platforms/hip/src/kernels/common.hip
+54
-0
platforms/hip/src/kernels/findInteractingBlocks.hip
platforms/hip/src/kernels/findInteractingBlocks.hip
+636
-0
platforms/hip/src/kernels/intrinsics.hip
platforms/hip/src/kernels/intrinsics.hip
+46
-0
platforms/hip/src/kernels/nonbonded.hip
platforms/hip/src/kernels/nonbonded.hip
+511
-0
platforms/hip/src/kernels/parallel.hip
platforms/hip/src/kernels/parallel.hip
+13
-0
platforms/hip/src/kernels/sort.hip
platforms/hip/src/kernels/sort.hip
+361
-0
platforms/hip/src/kernels/utilities.hip
platforms/hip/src/kernels/utilities.hip
+102
-0
platforms/hip/src/kernels/vectorOps.hip
platforms/hip/src/kernels/vectorOps.hip
+269
-0
platforms/hip/tests/CMakeLists.txt
platforms/hip/tests/CMakeLists.txt
+33
-0
platforms/hip/tests/HipTests.h
platforms/hip/tests/HipTests.h
+46
-0
platforms/hip/tests/TestHipATMForce.cpp
platforms/hip/tests/TestHipATMForce.cpp
+36
-0
platforms/hip/tests/TestHipAndersenThermostat.cpp
platforms/hip/tests/TestHipAndersenThermostat.cpp
+37
-0
platforms/hip/tests/TestHipBrownianIntegrator.cpp
platforms/hip/tests/TestHipBrownianIntegrator.cpp
+37
-0
platforms/hip/tests/TestHipCMAPTorsionForce.cpp
platforms/hip/tests/TestHipCMAPTorsionForce.cpp
+37
-0
platforms/hip/tests/TestHipCMMotionRemover.cpp
platforms/hip/tests/TestHipCMMotionRemover.cpp
+37
-0
platforms/hip/tests/TestHipCheckpoints.cpp
platforms/hip/tests/TestHipCheckpoints.cpp
+137
-0
platforms/hip/tests/TestHipCompoundIntegrator.cpp
platforms/hip/tests/TestHipCompoundIntegrator.cpp
+37
-0
platforms/hip/tests/TestHipCustomAngleForce.cpp
platforms/hip/tests/TestHipCustomAngleForce.cpp
+67
-0
platforms/hip/tests/TestHipCustomBondForce.cpp
platforms/hip/tests/TestHipCustomBondForce.cpp
+67
-0
platforms/hip/tests/TestHipCustomCPPForce.cpp
platforms/hip/tests/TestHipCustomCPPForce.cpp
+36
-0
No files found.
platforms/hip/src/kernels/common.hip
0 → 100644
View file @
3b8df952
/**
* This file contains HIP definitions for the macros and functions needed for the
* common compute framework.
*/
#define KERNEL extern "C" __global__
#define DEVICE __device__
#define LOCAL __shared__
#define LOCAL_ARG
#define GLOBAL
#define RESTRICT __restrict__
#define LOCAL_ID threadIdx.x
#define LOCAL_SIZE blockDim.x
#define GLOBAL_ID (blockIdx.x*blockDim.x+threadIdx.x)
#define GLOBAL_SIZE (blockDim.x*gridDim.x)
#define GROUP_ID blockIdx.x
#define NUM_GROUPS gridDim.x
#define SYNC_THREADS __syncthreads();
#define SYNC_WARPS {__builtin_amdgcn_wave_barrier(); __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "wavefront");}
#define MEM_FENCE __threadfence_block();
#define ATOMIC_ADD(dest, value) atomicAdd(dest, value)
typedef long long mm_long;
typedef unsigned long long mm_ulong;
#define SUPPORTS_DOUBLE_PRECISION 1
#define LAUNCH_BOUNDS_EXACT(WORK_GROUP_SIZE, WAVES_PER_EU) \
__attribute__((amdgpu_flat_work_group_size(WORK_GROUP_SIZE, WORK_GROUP_SIZE), amdgpu_waves_per_eu(WAVES_PER_EU, WAVES_PER_EU)))
#ifdef USE_DOUBLE_PRECISION
__device__ inline long long realToFixedPoint(double x) {
return static_cast<long long>(x * 0x100000000);
}
#else
__device__ inline long long realToFixedPoint(float x) {
// Faster way to calculate static_cast<long long>(x * 0x100000000) with exactly the same
// results but less instructions.
float integral = truncf(x);
float fractional = (x - integral) * 0x100000000;
unsigned int integral_u32 = static_cast<int>(integral);
unsigned int fractional_u32 = static_cast<unsigned int>(fabsf(fractional));
// A negative real number (with non-zero fractional) needs rounding-down x for integral and
// changing fractional's sign. However, -1 is used as a threshold instead of 0 because, when
// fractional is in (-1; 0], fractional_u32 is 0 and the number is considered an integer.
bool isNegReal = fractional <= -1.0f;
return (static_cast<unsigned long long>(isNegReal ? integral_u32 - 1 : integral_u32) << 32) |
static_cast<unsigned long long>(isNegReal ? 0 - fractional_u32 : fractional_u32);
}
#endif
platforms/hip/src/kernels/findInteractingBlocks.hip
0 → 100644
View file @
3b8df952
#define BUFFER_SIZE 256
#if defined(AMD_RDNA)
typedef unsigned int warpflags;
__device__ inline int warpPopc(warpflags x) {
return __popc(x);
}
#else
typedef unsigned long long warpflags;
__device__ inline int warpPopc(warpflags x) {
return __popcll(x);
}
#endif
struct alignas(sizeof(__half) * 4) BoundingBox {
__device__ BoundingBox(real3 f) {
// Round up so we'll err on the side of making the box a little too large.
// This ensures interactions will never be missed.
v[0] = __float2half_ru((float) f.x);
v[1] = __float2half_ru((float) f.y);
v[2] = __float2half_ru((float) f.z);
}
__device__ real3 toReal3() const {
return make_real3(__half2float(v[0]), __half2float(v[1]), __half2float(v[2]));
}
private:
__half v[3];
};
/**
* Find a bounding box for the atoms in each block.
*/
extern "C" __global__ void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
const real4* __restrict__ posq, real4* __restrict__ blockCenter, real4* __restrict__ blockBoundingBox, int* __restrict__ rebuildNeighborList,
real2* __restrict__ blockSizeRange) {
const int indexInTile = threadIdx.x%TILE_SIZE;
const int index = warpSize == TILE_SIZE ? blockIdx.x : (blockIdx.x*(warpSize/TILE_SIZE) + threadIdx.x/TILE_SIZE);
const int base = index * TILE_SIZE;
if (base >= numAtoms)
return;
real4 tPos = posq[base + indexInTile < numAtoms ? base + indexInTile : 0];
#ifdef USE_PERIODIC
real4 pos;
pos.x = SHFL(tPos.x, 0);
pos.y = SHFL(tPos.y, 0);
pos.z = SHFL(tPos.z, 0);
APPLY_PERIODIC_TO_POS(pos)
real4 minPos = pos;
real4 maxPos = pos;
for (int i = 1; i < TILE_SIZE; i++) {
pos.x = SHFL(tPos.x, i);
pos.y = SHFL(tPos.y, i);
pos.z = SHFL(tPos.z, i);
real4 center = 0.5f*(maxPos+minPos);
APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)
minPos = make_real4(min(minPos.x,pos.x), min(minPos.y,pos.y), min(minPos.z,pos.z), 0);
maxPos = make_real4(max(maxPos.x,pos.x), max(maxPos.y,pos.y), max(maxPos.z,pos.z), 0);
}
#else
real4 minPos = tPos;
real4 maxPos = tPos;
for (int i = TILE_SIZE >> 1; i > 0; i >>= 1) {
real4 tpos1, tpos2;
tpos1.x = __shfl_down(minPos.x, i, TILE_SIZE);
tpos1.y = __shfl_down(minPos.y, i, TILE_SIZE);
tpos1.z = __shfl_down(minPos.z, i, TILE_SIZE);
tpos2.x = __shfl_down(maxPos.x, i, TILE_SIZE);
tpos2.y = __shfl_down(maxPos.y, i, TILE_SIZE);
tpos2.z = __shfl_down(maxPos.z, i, TILE_SIZE);
minPos.x = min(minPos.x, tpos1.x);
minPos.y = min(minPos.y, tpos1.y);
minPos.z = min(minPos.z, tpos1.z);
maxPos.x = max(maxPos.x, tpos2.x);
maxPos.y = max(maxPos.y, tpos2.y);
maxPos.z = max(maxPos.z, tpos2.z);
}
minPos.x = SHFL(minPos.x, 0);
minPos.y = SHFL(minPos.y, 0);
minPos.z = SHFL(minPos.z, 0);
maxPos.x = SHFL(maxPos.x, 0);
maxPos.y = SHFL(maxPos.y, 0);
maxPos.z = SHFL(maxPos.z, 0);
#endif
real4 blockSize = 0.5f*(maxPos-minPos);
real4 center = 0.5f*(maxPos+minPos);
center.w = 0;
real4 delta = tPos - center;
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real tdelta = delta.x*delta.x+delta.y*delta.y+delta.z*delta.z;
real tcenter = max(center.w, tdelta);
for (int i = TILE_SIZE >> 1; i > 0; i >>= 1) {
real t = __shfl_down(tcenter, i, TILE_SIZE);
tcenter = max(tcenter, t);
}
if (indexInTile == 0) {
center.w = SQRT(tcenter);
blockBoundingBox[index] = blockSize;
blockCenter[index] = center;
// Record the range of sizes.
real totalSize = blockSize.x+blockSize.y+blockSize.z;
atomicMin(&blockSizeRange->x, totalSize);
atomicMax(&blockSizeRange->y, totalSize);
}
if (blockIdx.x == 0 && threadIdx.x == 0)
rebuildNeighborList[0] = 0;
}
extern "C" __global__ void computeSortKeys(const real4* __restrict__ blockBoundingBox, unsigned int* __restrict__ sortedBlocks, const real2* __restrict__ blockSizeRange/*, int numSizes*/) {
// Sort keys store the bin in the high order part and the block in the low
// order part.
real2 sizeRange = make_real2(LOG(blockSizeRange->x), LOG(blockSizeRange->y));
int numSizeBins = 20;
real scale = numSizeBins/(sizeRange.y-sizeRange.x);
unsigned int i = threadIdx.x+blockIdx.x*blockDim.x;
if (i < NUM_BLOCKS) {
real4 box = blockBoundingBox[i];
real size = LOG(box.x+box.y+box.z);
int bin = (size-sizeRange.x)*scale;
bin = max(0, min(bin, numSizeBins-1));
sortedBlocks[i] = (((unsigned int) bin)<<BIN_SHIFT) + i;
}
}
/**
* Sort the data about bounding boxes so it can be accessed more efficiently in the next kernel.
*/
extern "C" __global__ void sortBoxData(const unsigned int* __restrict__ sortedBlocks, const real4* __restrict__ blockCenter,
const real4* __restrict__ blockBoundingBox, real4* __restrict__ sortedBlockCenter, BoundingBox* __restrict__ sortedBlockBoundingBox,
#ifdef USE_LARGE_BLOCKS
real4* __restrict__ largeBlockCenter, BoundingBox* __restrict__ largeBlockBoundingBox, real4 periodicBoxSize,
real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
#endif
const real4* __restrict__ posq, const real4* __restrict__ oldPositions,
unsigned int* __restrict__ interactionCount, int* __restrict__ rebuildNeighborList, bool forceRebuild,
real2* __restrict__ blockSizeRange) {
int i = threadIdx.x+blockIdx.x*blockDim.x;
if (i < NUM_BLOCKS) {
unsigned int index = sortedBlocks[i] & BLOCK_INDEX_MASK;
sortedBlockCenter[i] = blockCenter[index];
sortedBlockBoundingBox[i] = BoundingBox(trimTo3(blockBoundingBox[index]));
#ifdef USE_LARGE_BLOCKS
// Compute the sizes of large blocks (composed of warpSize regular blocks) starting from each block.
real4 minPos = blockCenter[index]-blockBoundingBox[index];
real4 maxPos = blockCenter[index]+blockBoundingBox[index];
int last = min(i+warpSize, NUM_BLOCKS);
for (int j = i+1; j < last; j++) {
unsigned int index2 = sortedBlocks[j] & BLOCK_INDEX_MASK;
real4 blockPos = blockCenter[index2];
real4 width = blockBoundingBox[index2];
#ifdef USE_PERIODIC
real4 center = 0.5f*(maxPos+minPos);
APPLY_PERIODIC_TO_POS_WITH_CENTER(blockPos, center)
#endif
minPos = make_real4(min(minPos.x, blockPos.x-width.x), min(minPos.y, blockPos.y-width.y), min(minPos.z, blockPos.z-width.z), 0);
maxPos = make_real4(max(maxPos.x, blockPos.x+width.x), max(maxPos.y, blockPos.y+width.y), max(maxPos.z, blockPos.z+width.z), 0);
}
largeBlockCenter[i] = 0.5f*(maxPos+minPos);
largeBlockBoundingBox[i] = BoundingBox(trimTo3(0.5f*(maxPos-minPos)));
#endif
}
// Initialize min and max for the atomic reduction in findBlockBounds (for the next step)
if (i == 0) {
blockSizeRange->x = 1e38;
blockSizeRange->y = 0;
}
// Also check whether any atom has moved enough so that we really need to rebuild the neighbor list.
bool rebuild = forceRebuild;
if (i < NUM_ATOMS) {
real4 delta = oldPositions[i]-posq[i];
if (delta.x*delta.x + delta.y*delta.y + delta.z*delta.z > 0.25f*PADDING*PADDING)
rebuild = true;
}
if (rebuild) {
rebuildNeighborList[0] = 1;
interactionCount[0] = 0;
interactionCount[1] = 0;
}
}
#if MAX_BITS_FOR_PAIRS > 0
__device__ inline
void collectInteractions(unsigned int& interacts, float d, int bit) {
interacts |= (__float_as_uint(d) >> 31) << bit;
}
__device__ inline
void collectInteractions(unsigned int& interacts, double d, int bit) {
interacts |= ((unsigned int)__double2hiint(d) >> 31) << bit;
}
#else
// Simplified version that does not collect individual bits (they are not needed without single pairs),
// only sets the flag that there are any interactions.
__device__ inline
void collectInteractions(unsigned int& interacts, float d, int) {
interacts |= __float_as_uint(d) & (1 << 31);
}
__device__ inline
void collectInteractions(unsigned int& interacts, double d, int) {
interacts |= (unsigned int)__double2hiint(d) & (1 << 31);
}
#endif
#if !defined(USE_DOUBLE_PRECISION) && __has_builtin(__builtin_amdgcn_mfma_f32_4x4x1f32)
#define USE_MFMA
using vfloat = __attribute__((__vector_size__(4 * sizeof(float)))) float;
template<int BlockId>
inline __device__
void mfma4x4(const float4& pos1, const float4& pos2, const vfloat& c, unsigned int& interacts) {
vfloat d;
d = __builtin_amdgcn_mfma_f32_4x4x1f32(pos1.x, -pos2.x, c, 4, BlockId, 0);
d = __builtin_amdgcn_mfma_f32_4x4x1f32(pos1.y, -pos2.y, d, 4, BlockId, 0);
d = __builtin_amdgcn_mfma_f32_4x4x1f32(pos1.z, -pos2.z, d, 4, BlockId, 0);
d = __builtin_amdgcn_mfma_f32_4x4x1f32(pos1.w, 1.0f, d, 4, BlockId, 0);
#pragma unroll
for (int i = 0; i < 4; i++) {
collectInteractions(interacts, d[i], BlockId * 4 + i);
}
}
#endif
/**
* Compare the bounding boxes for each pair of atom blocks (comprised of TILE_SIZE atoms each), forming a tile. If the two
* atom blocks are sufficiently far apart, mark them as non-interacting. There are two stages in the algorithm.
*
* STAGE 1:
*
* A coarse grained atom block against interacting atom block neighbour list is constructed.
*
* Each warp first loads in some block X of interest. Each thread within the warp then loads
* in a different atom block Y. If Y has exclusions with X, then Y is not processed. If the bounding boxes
* of the two atom blocks are within the cutoff distance, then the two atom blocks are considered to be
* interacting and Y is added to the buffer for X.
*
* STAGE 2:
*
* A fine grained atom block against interacting atoms neighbour list is constructed.
*
* The warp loops over atom blocks Y that were found to (possibly) interact with atom block X. Each thread
* in the warp loops over the TILE_SIZE atoms in X and compares their positions to one particular atom from block Y.
* If it finds one closer than the cutoff distance, the atom is added to the list of atoms interacting with block X.
* This continues until the buffer fills up, at which point the results are written to global memory.
*
* [in] periodicBoxSize - size of the rectangular periodic box
* [in] invPeriodicBoxSize - inverse of the periodic box
* [in] blockCenter - the center of each bounding box
* [in] blockBoundingBox - bounding box of each atom block
* [out] interactionCount - total number of tiles that have interactions
* [out] interactingTiles - set of blocks that have interactions
* [out] interactingAtoms - a list of atoms that interact with each atom block
* [in] posq - x,y,z coordinates of each atom and charge q
* [in] maxTiles - maximum number of tiles to process, used for multi-GPUs
* [in] startBlockIndex - first block to process, used for multi-GPUs,
* [in] numBlocks - total number of atom blocks
* [in] sortedBlocks - a sorted list of atom blocks based on volume
* [in] sortedBlockCenter - sorted centers, duplicated for fast access to avoid indexing
* [in] sortedBlockBoundingBox - sorted bounding boxes, duplicated for fast access
* [in] exclusionIndices - maps into exclusionRowIndices with the starting position for a given atom
* [in] exclusionRowIndices - stores the a continuous list of exclusions
* eg: block 0 is excluded from atom 3,5,6
* block 1 is excluded from atom 3,4
* block 2 is excluded from atom 1,3,5,6
* exclusionIndices[0][3][5][8]
* exclusionRowIndices[3][5][6][3][4][1][3][5][6]
* index 0 1 2 3 4 5 6 7 8
* [out] oldPos - stores the positions of the atoms in which this neighbourlist was built on
* - this is used to decide when to rebuild a neighbourlist
* [in] rebuildNeighbourList - whether or not to execute this kernel
*
*/
extern "C" __global__ __launch_bounds__(GROUP_SIZE) void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
unsigned int* __restrict__ interactionCount, int* __restrict__ interactingTiles, unsigned int* __restrict__ interactingAtoms,
int2* __restrict__ singlePairs, const real4* __restrict__ posq, unsigned int maxTiles, unsigned int maxSinglePairs,
unsigned int startBlockIndex, unsigned int numBlocks, const unsigned int* __restrict__ sortedBlocks, const real4* __restrict__ sortedBlockCenter,
const BoundingBox* __restrict__ sortedBlockBoundingBox,
#ifdef USE_LARGE_BLOCKS
const real4* __restrict__ largeBlockCenter, const BoundingBox* __restrict__ largeBlockBoundingBox,
#endif
const unsigned int* __restrict__ exclusionIndices, const unsigned int* __restrict__ exclusionRowIndices,
real4* __restrict__ oldPositions, const int* __restrict__ rebuildNeighborList) {
if (rebuildNeighborList[0] == 0)
return; // The neighbor list doesn't need to be rebuilt.
constexpr int tilesPerWarp = warpSize/TILE_SIZE;
constexpr int warpsPerBlock = GROUP_SIZE/warpSize;
const int indexInWarp = threadIdx.x%warpSize;
const int indexInTile = threadIdx.x%TILE_SIZE;
const int tileInWarp = tilesPerWarp == 1 ? 0 : (threadIdx.x/TILE_SIZE)%tilesPerWarp;
const int warpInBlock = warpsPerBlock == 1 ? 0 : threadIdx.x/warpSize;
const int warpIndex = blockIdx.x*warpsPerBlock + (warpsPerBlock == 1 ? 0 : warpInBlock);
const warpflags warpMask = (static_cast<warpflags>(1)<<indexInWarp)-1;
__shared__ int workgroupBuffer[BUFFER_SIZE*warpsPerBlock];
__shared__ real4 workgroupPosBuffer[TILE_SIZE*warpsPerBlock];
__shared__ int workgroupExclusions[MAX_EXCLUSIONS*warpsPerBlock];
__shared__ int workgroupBlock2Buffer[(warpSize+1)*warpsPerBlock];
int* buffer = workgroupBuffer+BUFFER_SIZE*warpInBlock;
real4* posBuffer = workgroupPosBuffer+TILE_SIZE*warpInBlock;
int* exclusionsForX = workgroupExclusions+MAX_EXCLUSIONS*warpInBlock;
int* block2Buffer = workgroupBlock2Buffer+(warpSize+1)*warpInBlock;
// Loop over blocks.
int block1 = startBlockIndex+warpIndex/NUM_TILES_IN_BATCH;
if (block1 < startBlockIndex+numBlocks) {
// Load data for this block. Note that all threads in a warp are processing the same block.
int x = sortedBlocks[block1] & BLOCK_INDEX_MASK;
real4 blockCenterX = sortedBlockCenter[block1];
real3 blockSizeX = sortedBlockBoundingBox[block1].toReal3();
int neighborsInBuffer = 0;
real4 pos1 = posq[x*TILE_SIZE+indexInTile];
#ifdef USE_PERIODIC
const bool singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= PADDED_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= PADDED_CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= PADDED_CUTOFF);
if (singlePeriodicCopy) {
// The box is small enough that we can just translate all the atoms into a single periodic
// box, then skip having to apply periodic boundary conditions later.
APPLY_PERIODIC_TO_POS_WITH_CENTER(pos1, blockCenterX)
}
#endif
pos1.w = 0.5f * (pos1.x * pos1.x + pos1.y * pos1.y + pos1.z * pos1.z);
if (tileInWarp == 0) {
posBuffer[indexInTile] = pos1;
}
// Load exclusion data for block x.
const int exclusionStart = exclusionRowIndices[x];
const int exclusionEnd = exclusionRowIndices[x+1];
const int numExclusions = exclusionEnd-exclusionStart;
#pragma unroll 1
for (int j = indexInWarp; j < numExclusions; j += warpSize)
exclusionsForX[j] = exclusionIndices[exclusionStart+j];
// Loop over atom blocks to search for neighbors. The threads in a warp compare block1 against warpSize
// other blocks in parallel.
// For small systems multiple warps (NUM_TILES_IN_BATCH = 4, 2...) process one block1 reducing the overall
// duration of the kernel because first blocks block1 have to process more block2 blocks so most of compute
// units are idle at the end of the kernel (the kernel works on the upper triangle of
// the NUM_BLOCKS x NUM_BLOCKS matrix).
#ifdef USE_LARGE_BLOCKS
warpflags largeBlockFlags = 0;
int loadedLargeBlocks = 0;
#endif
int block2Count = 0;
// Load blocks from addresses aligned by warpSize for faster loading from sortedBlockCenter and sortedBlockBoundingBox.
for (int block2Base = ((block1+1)/warpSize + warpIndex%NUM_TILES_IN_BATCH)*warpSize; block2Base < NUM_BLOCKS; block2Base += warpSize*NUM_TILES_IN_BATCH) {
// Last iteration cannot be skipped (on CDNA where tilesPerWarp == 2)
const bool lastIteration = block2Base + warpSize*NUM_TILES_IN_BATCH >= NUM_BLOCKS;
#ifdef USE_LARGE_BLOCKS
if (loadedLargeBlocks == 0) {
// Check the next set of large blocks.
int largeBlockIndex = block2Base + warpSize*NUM_TILES_IN_BATCH*indexInWarp;
bool includeLargeBlock = false;
if (largeBlockIndex < NUM_BLOCKS) {
real4 largeCenter = largeBlockCenter[largeBlockIndex];
real3 largeSize = largeBlockBoundingBox[largeBlockIndex].toReal3();
real4 blockDelta = blockCenterX-largeCenter;
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(blockDelta)
#endif
blockDelta.x = max(0.0f, fabs(blockDelta.x)-blockSizeX.x-largeSize.x);
blockDelta.y = max(0.0f, fabs(blockDelta.y)-blockSizeX.y-largeSize.y);
blockDelta.z = max(0.0f, fabs(blockDelta.z)-blockSizeX.z-largeSize.z);
includeLargeBlock = (blockDelta.x*blockDelta.x+blockDelta.y*blockDelta.y+blockDelta.z*blockDelta.z < PADDED_CUTOFF_SQUARED);
#ifdef TRICLINIC
// The calculation to find the nearest periodic copy is only guaranteed to work if the nearest copy is less than half a box width away.
// If there's any possibility we might have missed it, do a detailed check.
if (periodicBoxSize.z/2-blockSizeX.z-largeSize.z < PADDED_CUTOFF || periodicBoxSize.y/2-blockSizeX.y-largeSize.y < PADDED_CUTOFF)
includeLargeBlock = true;
#endif
}
largeBlockFlags = __ballot(includeLargeBlock);
loadedLargeBlocks = warpSize;
}
loadedLargeBlocks--;
if ((largeBlockFlags&1) == 0 && !lastIteration) {
// None of the next warpSize blocks interact with block 1.
largeBlockFlags >>= 1;
continue;
}
largeBlockFlags >>= 1;
#endif
int block2 = block2Base+indexInWarp;
bool includeBlock2 = (block1 < block2 && block2 < NUM_BLOCKS);
block2 = includeBlock2 ? block2 : block1;
bool forceInclude = false;
real4 blockCenterY = sortedBlockCenter[block2];
real4 blockDelta = blockCenterX-blockCenterY;
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(blockDelta)
#endif
includeBlock2 &= (blockDelta.x*blockDelta.x+blockDelta.y*blockDelta.y+blockDelta.z*blockDelta.z < (PADDED_CUTOFF+blockCenterX.w+blockCenterY.w)*(PADDED_CUTOFF+blockCenterX.w+blockCenterY.w));
#ifndef TRICLINIC
if (!lastIteration && __ballot(includeBlock2) == 0)
continue;
#endif
real3 blockSizeY = sortedBlockBoundingBox[block2].toReal3();
blockDelta.x = max(0.0f, fabs(blockDelta.x)-blockSizeX.x-blockSizeY.x);
blockDelta.y = max(0.0f, fabs(blockDelta.y)-blockSizeX.y-blockSizeY.y);
blockDelta.z = max(0.0f, fabs(blockDelta.z)-blockSizeX.z-blockSizeY.z);
includeBlock2 &= (blockDelta.x*blockDelta.x+blockDelta.y*blockDelta.y+blockDelta.z*blockDelta.z < PADDED_CUTOFF_SQUARED);
#ifdef TRICLINIC
// The calculation to find the nearest periodic copy is only guaranteed to work if the nearest copy is less than half a box width away.
// If there's any possibility we might have missed it, do a detailed check.
if (periodicBoxSize.z/2-blockSizeX.z-blockSizeY.z < PADDED_CUTOFF || periodicBoxSize.y/2-blockSizeX.y-blockSizeY.y < PADDED_CUTOFF)
includeBlock2 = forceInclude = true;
#endif
// Collect any blocks we identified as potentially containing neighbors.
warpflags includeBlockFlags = __ballot(includeBlock2);
if (includeBlock2) {
int index = block2Count + warpPopc(includeBlockFlags&warpMask);
block2Buffer[index] = (block2 << 1) | (forceInclude ? 1 : 0);
}
block2Count += warpPopc(includeBlockFlags);
// Loop over the collected candidates (each warp processes 2 blocks on CDNA or 1 block on RDNA).
// Process even number of blocks on CDNA so both half-warps have work to do (except for
// the last iteration of the for-block2Base loop when the left-over must be processed).
const int block2ToProcess = lastIteration ? block2Count : block2Count/tilesPerWarp*tilesPerWarp;
for (int block2Index = 0; block2Index < block2ToProcess; block2Index += tilesPerWarp) {
bool includeBlock2 = block2Index + tileInWarp < block2Count;
const int b = block2Buffer[min(block2Index + tileInWarp, block2Count - 1)];
const bool forceInclude = b & 1;
const int block2 = b >> 1;
int y = sortedBlocks[block2] & BLOCK_INDEX_MASK;
#pragma unroll 1
for (int k = indexInTile; k < numExclusions; k += TILE_SIZE)
includeBlock2 &= (exclusionsForX[k] != y);
includeBlock2 = BALLOT(!includeBlock2) == 0;
// Check each atom in block Y for interactions.
int atom2 = y*TILE_SIZE+indexInTile;
real4 pos2 = posq[atom2];
#ifdef USE_PERIODIC
if (singlePeriodicCopy) {
APPLY_PERIODIC_TO_POS_WITH_CENTER(pos2, blockCenterX)
}
#endif
pos2.w = 0.5f * (pos2.x * pos2.x + pos2.y * pos2.y + pos2.z * pos2.z);
real4 blockCenterY = sortedBlockCenter[block2];
real3 atomDelta = trimTo3(pos1)-trimTo3(blockCenterY);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(atomDelta)
#endif
tileflags atomFlags = BALLOT(forceInclude || atomDelta.x*atomDelta.x+atomDelta.y*atomDelta.y+atomDelta.z*atomDelta.z < (PADDED_CUTOFF+blockCenterY.w)*(PADDED_CUTOFF+blockCenterY.w));
tileflags interacts = 0;
// The condition `posj.w + pos2.w - posj.x*pos2.x - posj.y*pos2.y - posj.z*pos2.z < 0.5f * PADDED_CUTOFF_SQUARED` is expressed as
// `posj.x*pos2.x - posj.y*pos2.y - posj.z*pos2.z - posj.w - 0.5f * PADDED_CUTOFF_SQUARED - pos2.w` and computed using fma
// (it saves 1 instruction).
// Sign bit is used directly instead of `halfDist2 < 0.5f * PADDED_CUTOFF_SQUARED ? 1<<j : 0`.
#ifdef USE_PERIODIC
if (!singlePeriodicCopy) {
while (atomFlags) {
int j = __ffs(atomFlags)-1;
atomFlags = atomFlags ^ (static_cast<tileflags>(1) << j);
real3 delta = trimTo3(pos2)-trimTo3(posBuffer[j]);
APPLY_PERIODIC_TO_DELTA(delta)
real d = delta.x*delta.x+delta.y*delta.y+delta.z*delta.z - PADDED_CUTOFF_SQUARED;
collectInteractions(interacts, d, j);
}
}
else {
#endif
const real lim = 0.5f * PADDED_CUTOFF_SQUARED - pos2.w;
#if defined(USE_MFMA)
const vfloat c = { -lim, -lim, -lim, -lim };
mfma4x4<0>(pos1, pos2, c, interacts);
mfma4x4<1>(pos1, pos2, c, interacts);
mfma4x4<2>(pos1, pos2, c, interacts);
mfma4x4<3>(pos1, pos2, c, interacts);
mfma4x4<4>(pos1, pos2, c, interacts);
mfma4x4<5>(pos1, pos2, c, interacts);
mfma4x4<6>(pos1, pos2, c, interacts);
mfma4x4<7>(pos1, pos2, c, interacts);
#else
while (atomFlags) {
int j = __ffs(atomFlags)-1;
atomFlags = atomFlags ^ (static_cast<tileflags>(1) << j);
real4 posj = posBuffer[j];
real d = fma(-posj.x, pos2.x, fma(-posj.y, pos2.y, fma(-posj.z, pos2.z, posj.w - lim)));
collectInteractions(interacts, d, j);
}
#endif
#ifdef USE_PERIODIC
}
#endif
if (atom2 >= NUM_ATOMS || !includeBlock2) {
interacts = 0;
}
#if MAX_BITS_FOR_PAIRS > 0
const unsigned int interactCount = __popc(interacts);
// Record interactions that should be computed as single pairs rather than in blocks.
const bool storeAsSinglePair = interactCount > 0 && interactCount <= MAX_BITS_FOR_PAIRS;
if (__ballot(storeAsSinglePair)) {
unsigned int sum = 0;
unsigned int prevSum = 0;
for (int i = 1; i <= MAX_BITS_FOR_PAIRS; i++) {
warpflags b = __ballot(interactCount == i);
sum += warpPopc(b) * i;
prevSum += warpPopc(b&warpMask) * i;
}
unsigned int pairStartIndex = 0;
if (indexInWarp == 0)
pairStartIndex = atomicAdd(&interactionCount[1], sum);
pairStartIndex = __shfl(pairStartIndex, 0);
unsigned int pairIndex = pairStartIndex + prevSum;
if (storeAsSinglePair && pairIndex+interactCount <= maxSinglePairs) {
while (interacts != 0) {
int j = __ffs(interacts)-1;
singlePairs[pairIndex] = make_int2(atom2, x*TILE_SIZE+j);
interacts = interacts ^ (static_cast<tileflags>(1) << j);
pairIndex++;
}
}
}
#else
const unsigned int interactCount = interacts;
#endif
// Add any interacting atoms to the buffer.
warpflags includeAtomFlags = __ballot(interactCount > MAX_BITS_FOR_PAIRS);
if (interactCount > MAX_BITS_FOR_PAIRS) {
int index = neighborsInBuffer+warpPopc(includeAtomFlags&warpMask);
buffer[index] = atom2;
}
neighborsInBuffer += warpPopc(includeAtomFlags);
if (neighborsInBuffer > BUFFER_SIZE-warpSize) {
// Store the new tiles to memory.
unsigned int tilesToStore = neighborsInBuffer/warpSize*tilesPerWarp;
unsigned int tileStartIndex = 0;
if (indexInWarp == 0)
tileStartIndex = atomicAdd(&interactionCount[0], tilesToStore);
unsigned int newTileStartIndex = __shfl(tileStartIndex, 0);
if (newTileStartIndex+tilesToStore <= maxTiles) {
if (indexInWarp < tilesToStore)
interactingTiles[newTileStartIndex+indexInWarp] = x;
for (int j = 0; j < tilesToStore/tilesPerWarp; j++)
interactingAtoms[newTileStartIndex*TILE_SIZE+j*warpSize+indexInWarp] = buffer[j*warpSize+indexInWarp];
}
if (indexInWarp+TILE_SIZE*tilesToStore < BUFFER_SIZE)
buffer[indexInWarp] = buffer[indexInWarp+TILE_SIZE*tilesToStore];
neighborsInBuffer -= TILE_SIZE*tilesToStore;
}
}
// Move not processed blocks to the head of block2Buffer.
if (indexInWarp < block2Count - block2ToProcess)
block2Buffer[indexInWarp] = block2Buffer[block2ToProcess + indexInWarp];
block2Count = block2Count - block2ToProcess;
}
// If we have a partially filled buffer, store it to memory.
if (neighborsInBuffer > 0) {
unsigned int tilesToStore = (neighborsInBuffer+TILE_SIZE-1)/TILE_SIZE;
unsigned int tileStartIndex = 0;
if (indexInWarp == 0)
tileStartIndex = atomicAdd(&interactionCount[0], tilesToStore);
unsigned int newTileStartIndex = __shfl(tileStartIndex, 0);
if (newTileStartIndex+tilesToStore <= maxTiles) {
if (indexInWarp < tilesToStore)
interactingTiles[newTileStartIndex+indexInWarp] = x;
for (int j = 0; j <= tilesToStore/tilesPerWarp; j++) {
if (j*warpSize+indexInWarp < tilesToStore*TILE_SIZE)
interactingAtoms[newTileStartIndex*TILE_SIZE+j*warpSize+indexInWarp] = (j*warpSize+indexInWarp < neighborsInBuffer ? buffer[j*warpSize+indexInWarp] : PADDED_NUM_ATOMS);
}
}
}
}
// Record the positions the neighbor list is based on.
int i = threadIdx.x+blockIdx.x*GROUP_SIZE;
if (i < NUM_ATOMS) {
oldPositions[i] = posq[i];
}
}
extern "C" __global__ void copyInteractionCounts(const unsigned int* __restrict__ interactionCount,
unsigned int* __restrict__ pinnedInteractionCount) {
pinnedInteractionCount[0] = interactionCount[0];
pinnedInteractionCount[1] = interactionCount[1];
}
platforms/hip/src/kernels/intrinsics.hip
0 → 100644
View file @
3b8df952
/**
* This file contains the device function for using cross-lane operations (ballot and shuffle)
*/
#if defined(TILE_SIZE)
#if !defined(AMD_RDNA)
// Two subwarps per warp
#define SHFL(var, srcLane) __shfl(var, (srcLane) & (TILE_SIZE - 1), TILE_SIZE)
#define BALLOT(var) (unsigned int)(__ballot(var) >> (threadIdx.x & ((64 - 1) ^ (TILE_SIZE - 1))))
#else
#define SHFL(var, srcLane) __shfl(var, srcLane)
#define BALLOT(var) __ballot(var)
#endif
#endif
template<class T>
static __inline__ __device__
T warpShuffle(const T& input, const int src_lane) {
static_assert(sizeof(T) % sizeof(int) == 0, "incorrect type size");
constexpr int words_no = sizeof(T) / sizeof(int);
T output;
#pragma unroll
for(int i = 0; i < words_no; i++) {
int word;
__builtin_memcpy(&word, reinterpret_cast<const char*>(&input) + i * sizeof(int), sizeof(int));
word = __builtin_amdgcn_ds_bpermute(src_lane << 2, word);
__builtin_memcpy(reinterpret_cast<char*>(&output) + i * sizeof(int), &word, sizeof(int));
}
return output;
}
template<int Subwarp, class T>
static __inline__ __device__
typename std::enable_if<(Subwarp == warpSize), T>::type
warpRotateLeft(const T& input) {
return warpShuffle(input, threadIdx.x + 1);
}
template<int Subwarp, class T>
static __inline__ __device__
typename std::enable_if<!(Subwarp == warpSize), T>::type
warpRotateLeft(const T& input) {
return warpShuffle(input, ((threadIdx.x + 1) & (Subwarp - 1)) | (threadIdx.x & ~(Subwarp - 1)));
}
platforms/hip/src/kernels/nonbonded.hip
0 → 100644
View file @
3b8df952
/**
* Compute nonbonded interactions. The kernel is separated into two parts,
* tiles with exclusions and tiles without exclusions. It relies heavily on
* implicit warp-level synchronization. A tile is defined by two atom blocks
* each of warpsize. Each warp computes a range of tiles.
*
* Tiles with exclusions compute the entire set of interactions across
* atom blocks, equal to warpsize*warpsize. In order to avoid access conflicts
* the forces are computed and accumulated diagonally in the manner shown below
* where, suppose
*
* [a-h] comprise atom block 1, [i-p] comprise atom block 2
*
* 1 denotes the first set of calculations within the warp
* 2 denotes the second set of calculations within the warp
* ... etc.
*
* threads
* 0 1 2 3 4 5 6 7
* atom1
* L a b c d e f g h
* o i 1 2 3 4 5 6 7 8
* c j 8 1 2 3 4 5 6 7
* a k 7 8 1 2 3 4 5 6
* l l 6 7 8 1 2 3 4 5
* D m 5 6 7 8 1 2 3 4
* a n 4 5 6 7 8 1 2 3
* t o 3 4 5 6 7 8 1 2
* a p 2 3 4 5 6 7 8 1
*
* Tiles without exclusions read off directly from the neighbourlist interactingAtoms
* and follows the same force accumulation method. If more there are more interactingTiles
* than the size of the neighbourlist initially allocated, the neighbourlist is rebuilt
* and the full tileset is computed. This should happen on the first step, and very rarely
* afterwards.
*
* On diagonal exclusion tiles use __shfl to broadcast. For all other types of tiles __shfl
* is used to pass around the forces, positions, and parameters when computing the forces.
*
* [out]forceBuffers - forces on each atom to eventually be accumulated
* [out]energyBuffer - energyBuffer to eventually be accumulated
* [in]posq - x,y,z,charge
* [in]exclusions - 1024-bit flags denoting atom-atom exclusions for each tile
* [in]exclusionTiles - x,y denotes the indices of tiles that have an exclusion
* [in]startTileIndex - index into first tile to be processed
* [in]numTileIndices - number of tiles this context is responsible for processing
* [in]int tiles - the atom block for each tile
* [in]interactionCount - total number of tiles that have an interaction
* [in]maxTiles - stores the size of the neighbourlist in case it needs
* - to be expanded
* [in]periodicBoxSize - size of the Periodic Box, last dimension (w) not used
* [in]invPeriodicBox - inverse of the periodicBoxSize, pre-computed for speed
* [in]blockCenter - the center of each block in euclidean coordinates
* [in]blockSize - size of the each block, radiating from the center
* - x is half the distance of total length
* - y is half the distance of total width
* - z is half the distance of total height
* - w is not used
* [in]interactingAtoms - a list of interactions within a given tile
*
*/
extern "C" __launch_bounds__(THREAD_BLOCK_SIZE) __global__ void computeNonbonded(
unsigned long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, const tileflags* __restrict__ exclusions,
const int2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned long long numTileIndices
#ifdef USE_CUTOFF
, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms, unsigned int maxSinglePairs,
const int2* __restrict__ singlePairs
#endif
PARAMETER_ARGUMENTS) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
const unsigned int warp = blockIdx.x*(THREAD_BLOCK_SIZE/TILE_SIZE) + threadIdx.x/TILE_SIZE; // global warpIndex
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); // index within the warp
const unsigned int tbx = threadIdx.x - tgx; // block warpIndex
mixed energy = 0;
INIT_DERIVATIVES
// First loop: process tiles that contain exclusions.
for (int pos = FIRST_EXCLUSION_TILE+warp; pos < LAST_EXCLUSION_TILE; pos+=totalWarps) {
const int2 tileIndices = exclusionTiles[pos];
const unsigned int x = tileIndices.x;
const unsigned int y = tileIndices.y;
real3 force = make_real3(0);
unsigned int atom1 = x*TILE_SIZE + tgx;
real4 posq1 = posq[atom1];
LOAD_ATOM1_PARAMETERS
#ifdef USE_EXCLUSIONS
tileflags excl = exclusions[pos*TILE_SIZE+tgx];
#endif
if (x == y) {
// This tile is on the diagonal.
real4 shflPosq = posq1;
// we do not need to fetch parameters from global since this is a symmetric tile
// instead we can broadcast the values using shuffle
#pragma unroll 2
for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2;
BROADCAST_WARP_DATA
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < MAX_CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
int atom2 = y*TILE_SIZE+j;
#ifdef USE_SYMMETRIC
real dEdR = 0.0f;
#else
real3 dEdR1 = make_real3(0);
real3 dEdR2 = make_real3(0);
#endif
#ifdef USE_EXCLUSIONS
bool isExcluded = !(atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && (excl & 1));
#endif
real tempEnergy = 0.0f;
const real interactionScale = 0.5f;
COMPUTE_INTERACTION
#ifdef INCLUDE_ENERGY
energy += 0.5f*tempEnergy;
#endif
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
force.x -= delta.x*dEdR;
force.y -= delta.y*dEdR;
force.z -= delta.z*dEdR;
#else
force.x -= dEdR1.x;
force.y -= dEdR1.y;
force.z -= dEdR1.z;
#endif
#endif
#ifdef USE_CUTOFF
}
#endif
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
}
}
else {
// This is an off-diagonal tile.
unsigned int j = y*TILE_SIZE + tgx;
int atomIndex = j;
real4 shflPosq = posq[j];
real3 shflForce;
shflForce.x = 0.0f;
shflForce.y = 0.0f;
shflForce.z = 0.0f;
DECLARE_LOCAL_PARAMETERS
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
#ifdef USE_EXCLUSIONS
excl = (excl >> tgx) | (excl << (TILE_SIZE - tgx));
#endif
#pragma unroll 2
for (j = 0; j < TILE_SIZE; j++) {
real4 posq2 = shflPosq;
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < MAX_CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
int atom2 = atomIndex;
#ifdef USE_SYMMETRIC
real dEdR = 0.0f;
#else
real3 dEdR1 = make_real3(0);
real3 dEdR2 = make_real3(0);
#endif
#ifdef USE_EXCLUSIONS
bool isExcluded = !(atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && (excl & 1));
#endif
real tempEnergy = 0.0f;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION
#ifdef INCLUDE_ENERGY
energy += tempEnergy;
#endif
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta *= dEdR;
force.x -= delta.x;
force.y -= delta.y;
force.z -= delta.z;
shflForce.x += delta.x;
shflForce.y += delta.y;
shflForce.z += delta.z;
#else // !USE_SYMMETRIC
force.x -= dEdR1.x;
force.y -= dEdR1.y;
force.z -= dEdR1.z;
shflForce.x += dEdR2.x;
shflForce.y += dEdR2.y;
shflForce.z += dEdR2.z;
#endif // end USE_SYMMETRIC
#endif
#ifdef USE_CUTOFF
}
#endif
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
SHUFFLE_WARP_DATA
atomIndex = warpRotateLeft<TILE_SIZE>(atomIndex);
}
const unsigned int offset = atomIndex;
// write results for off diagonal tiles
#ifdef INCLUDE_FORCES
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>(realToFixedPoint(shflForce.x)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(shflForce.y)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(shflForce.z)));
#endif
}
// Write results for on and off diagonal tiles
#ifdef INCLUDE_FORCES
const unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>(realToFixedPoint(force.x)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.y)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.z)));
#endif
}
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
// of them (no cutoff).
#ifdef USE_NEIGHBOR_LIST
const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
for (unsigned int pos0 = warp; pos0 < LAST_EXCLUSION_TILE+numTiles; pos0+=totalWarps) {
// Skip warps that may be still busy in the first loop
if (pos0 < LAST_EXCLUSION_TILE) {
continue;
}
const unsigned int pos = pos0-LAST_EXCLUSION_TILE;
#else
int pos = (int) (startTileIndex+warp*numTileIndices/totalWarps);
int end = (int) (startTileIndex+(warp+1)*numTileIndices/totalWarps);
int skipBase = 0;
int currentSkipIndex = tbx;
int skipTiles;
skipTiles = -1;
for (; pos < end; pos++) {
#endif
real3 force = make_real3(0);
bool includeTile = true;
// Extract the coordinates of this tile.
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_NEIGHBOR_LIST
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed.
while (SHFL(skipTiles, TILE_SIZE-1) < pos) {
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
int2 tile = exclusionTiles[skipBase+tgx];
skipTiles = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
skipTiles = end;
skipBase += TILE_SIZE;
currentSkipIndex = 0;
}
while (SHFL(skipTiles, currentSkipIndex) < pos)
currentSkipIndex++;
includeTile = (SHFL(skipTiles, currentSkipIndex) != pos);
#endif
if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx;
// Load atom data for this tile.
real4 posq1 = posq[atom1];
LOAD_ATOM1_PARAMETERS
#ifdef USE_NEIGHBOR_LIST
unsigned int j = interactingAtoms[pos*TILE_SIZE+tgx];
#else
unsigned int j = y*TILE_SIZE + tgx;
#endif
int atomIndex = j;
DECLARE_LOCAL_PARAMETERS
real4 shflPosq;
real3 shflForce;
shflForce.x = 0.0f;
shflForce.y = 0.0f;
shflForce.z = 0.0f;
if (j < PADDED_NUM_ATOMS) {
// Load position of atom j from from global memory
shflPosq = posq[j];
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
else {
shflPosq = make_real4(0, 0, 0, 0);
CLEAR_LOCAL_PARAMETERS
}
#ifdef USE_PERIODIC
if (singlePeriodicCopy) {
// The box is small enough that we can just translate all the atoms into a single periodic
// box, then skip having to apply periodic boundary conditions later.
real4 blockCenterX = blockCenter[x];
APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
APPLY_PERIODIC_TO_POS_WITH_CENTER(shflPosq, blockCenterX)
#pragma unroll 2
for (j = 0; j < TILE_SIZE; j++) {
real4 posq2 = shflPosq;
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < MAX_CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
int atom2 = atomIndex;
#ifdef USE_SYMMETRIC
real dEdR = 0.0f;
#else
real3 dEdR1 = make_real3(0);
real3 dEdR2 = make_real3(0);
#endif
#ifdef USE_EXCLUSIONS
bool isExcluded = !(atom1 < NUM_ATOMS && atom2 < NUM_ATOMS);
#endif
real tempEnergy = 0.0f;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION
#ifdef INCLUDE_ENERGY
energy += tempEnergy;
#endif
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta *= dEdR;
force.x -= delta.x;
force.y -= delta.y;
force.z -= delta.z;
shflForce.x += delta.x;
shflForce.y += delta.y;
shflForce.z += delta.z;
#else // !USE_SYMMETRIC
force.x -= dEdR1.x;
force.y -= dEdR1.y;
force.z -= dEdR1.z;
shflForce.x += dEdR2.x;
shflForce.y += dEdR2.y;
shflForce.z += dEdR2.z;
#endif // end USE_SYMMETRIC
#endif
#ifdef USE_CUTOFF
}
#endif
SHUFFLE_WARP_DATA
atomIndex = warpRotateLeft<TILE_SIZE>(atomIndex);
}
}
else
#endif
{
// We need to apply periodic boundary conditions separately for each interaction.
#pragma unroll 2
for (j = 0; j < TILE_SIZE; j++) {
real4 posq2 = shflPosq;
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < MAX_CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
int atom2 = atomIndex;
#ifdef USE_SYMMETRIC
real dEdR = 0.0f;
#else
real3 dEdR1 = make_real3(0);
real3 dEdR2 = make_real3(0);
#endif
#ifdef USE_EXCLUSIONS
bool isExcluded = !(atom1 < NUM_ATOMS && atom2 < NUM_ATOMS);
#endif
real tempEnergy = 0.0f;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION
#ifdef INCLUDE_ENERGY
energy += tempEnergy;
#endif
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta *= dEdR;
force.x -= delta.x;
force.y -= delta.y;
force.z -= delta.z;
shflForce.x += delta.x;
shflForce.y += delta.y;
shflForce.z += delta.z;
#else // !USE_SYMMETRIC
force.x -= dEdR1.x;
force.y -= dEdR1.y;
force.z -= dEdR1.z;
shflForce.x += dEdR2.x;
shflForce.y += dEdR2.y;
shflForce.z += dEdR2.z;
#endif // end USE_SYMMETRIC
#endif
#ifdef USE_CUTOFF
}
#endif
SHUFFLE_WARP_DATA
atomIndex = warpRotateLeft<TILE_SIZE>(atomIndex);
}
}
// Write results.
#ifdef INCLUDE_FORCES
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>(realToFixedPoint(force.x)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.y)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.z)));
unsigned int atom2 = atomIndex;
if (atom2 < PADDED_NUM_ATOMS) {
atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>(realToFixedPoint(shflForce.x)));
atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(shflForce.y)));
atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(shflForce.z)));
}
#endif
}
}
// Third loop: single pairs that aren't part of a tile.
#if USE_NEIGHBOR_LIST
const unsigned int numPairs = interactionCount[1];
if (numPairs > maxSinglePairs)
return; // There wasn't enough memory for the neighbor list.
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < numPairs; i += blockDim.x*gridDim.x) {
int2 pair = singlePairs[i];
int atom1 = pair.x;
int atom2 = pair.y;
real4 posq1 = posq[atom1];
real4 posq2 = posq[atom2];
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < MAX_CUTOFF_SQUARED) {
LOAD_ATOM1_PARAMETERS
int j = atom2;
DECLARE_LOCAL_PARAMETERS
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
LOAD_ATOM2_PARAMETERS
real invR = RSQRT(r2);
real r = r2*invR;
#ifdef USE_SYMMETRIC
real dEdR = 0.0f;
#else
real3 dEdR1 = make_real3(0);
real3 dEdR2 = make_real3(0);
#endif
bool isExcluded = false;
real tempEnergy = 0.0f;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION
#ifdef INCLUDE_ENERGY
energy += tempEnergy;
#endif
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
real3 dEdR1 = delta*dEdR;
real3 dEdR2 = -dEdR1;
#endif
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>(realToFixedPoint(-dEdR1.x)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(-dEdR1.y)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(-dEdR1.z)));
atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>(realToFixedPoint(-dEdR2.x)));
atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(-dEdR2.y)));
atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(-dEdR2.z)));
#endif
}
}
#endif
#ifdef INCLUDE_ENERGY
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
#endif
SAVE_DERIVATIVES
}
platforms/hip/src/kernels/parallel.hip
0 → 100644
View file @
3b8df952
/**
* Sum the forces computed by different contexts.
*/
extern "C" __global__ void sumForces(long long* __restrict__ force, long long* __restrict__ buffer, int bufferSize, int numBuffers) {
int totalSize = bufferSize*numBuffers;
for (int index = blockDim.x*blockIdx.x+threadIdx.x; index < bufferSize; index += blockDim.x*gridDim.x) {
long long sum = force[index];
for (int i = index; i < totalSize; i += bufferSize)
sum += buffer[i];
force[index] = sum;
}
}
platforms/hip/src/kernels/sort.hip
0 → 100644
View file @
3b8df952
__device__ KEY_TYPE getValue(DATA_TYPE value) {
return SORT_KEY;
}
extern "C" {
/**
* Sort a list that is short enough to entirely fit in local memory. This is executed as
* a single thread block.
*/
__global__ void sortShortList(DATA_TYPE* __restrict__ data, unsigned int length) {
// Load the data into local memory.
extern __shared__ DATA_TYPE dataBuffer[];
for (int index = threadIdx.x; index < length; index += blockDim.x)
dataBuffer[index] = data[index];
__syncthreads();
// Perform a bitonic sort in local memory.
unsigned int lenghtNextPow2 = length <= 2 ? length : (1 << (32 - __clz(length - 1)));
for (unsigned int k = 2; k <= lenghtNextPow2; k *= 2) {
for (unsigned int j = k/2; j > 0; j /= 2) {
for (unsigned int i = threadIdx.x; i < length; i += blockDim.x) {
int ixj = i^j;
if (ixj > i && ixj < length) {
DATA_TYPE value1 = dataBuffer[i];
DATA_TYPE value2 = dataBuffer[ixj];
bool ascending = (__popc(~i & (lenghtNextPow2 - k)) & 1) == 0;
KEY_TYPE lowKey = (ascending ? getValue(value1) : getValue(value2));
KEY_TYPE highKey = (ascending ? getValue(value2) : getValue(value1));
if (lowKey > highKey) {
dataBuffer[i] = value2;
dataBuffer[ixj] = value1;
}
}
}
__syncthreads();
}
}
// Write the data back to global memory.
for (int index = threadIdx.x; index < length; index += blockDim.x)
data[index] = dataBuffer[index];
}
/**
* An alternate kernel for sorting short lists. In this version every thread does a full
* scan through the data to select the destination for one element. This involves more
* work, but also parallelizes much better.
*/
__global__ void sortShortList2(const DATA_TYPE* __restrict__ dataIn, DATA_TYPE* __restrict__ dataOut, unsigned int length) {
extern __shared__ KEY_TYPE keyBuffer[];
int globalId = blockDim.x*blockIdx.x+threadIdx.x;
DATA_TYPE value = dataIn[globalId < length ? globalId : 0];
KEY_TYPE key = getValue(value);
int count = 0;
for (int blockStart = 0; blockStart < length; blockStart += blockDim.x) {
int numInBlock = min(static_cast<int>(blockDim.x), static_cast<int>(length-blockStart));
__syncthreads();
if (threadIdx.x < numInBlock)
keyBuffer[threadIdx.x] = getValue(dataIn[blockStart+threadIdx.x]);
__syncthreads();
for (int i = 0; i < numInBlock; i++) {
KEY_TYPE otherKey = keyBuffer[i];
count += (otherKey < key) | ((otherKey == key) & (blockStart+i < globalId));
}
}
if (globalId < length)
dataOut[count] = value;
}
inline __device__ void reduceMinMax(KEY_TYPE minimum, KEY_TYPE maximum, KEY_TYPE* minBuffer, KEY_TYPE* maxBuffer,
KEY_TYPE* minResult, KEY_TYPE* maxResult) {
minBuffer[threadIdx.x] = minimum;
maxBuffer[threadIdx.x] = maximum;
__syncthreads();
for (unsigned int step = 1; step < blockDim.x; step *= 2) {
if ((threadIdx.x+step < blockDim.x) & ((threadIdx.x&(2*step-1)) == 0)) {
minBuffer[threadIdx.x] = min(minBuffer[threadIdx.x], minBuffer[threadIdx.x+step]);
maxBuffer[threadIdx.x] = max(maxBuffer[threadIdx.x], maxBuffer[threadIdx.x+step]);
}
__syncthreads();
}
if (threadIdx.x == 0) {
*minResult = minBuffer[0];
*maxResult = maxBuffer[0];
}
}
/**
* Calculate the minimum and maximum value in the array to be sorted.
*/
__global__ void computeRange(const DATA_TYPE* __restrict__ data, unsigned int length, KEY_TYPE* __restrict__ range,
unsigned int numBuckets, unsigned int* __restrict__ bucketOffset, unsigned int* __restrict__ counters) {
#if UNIFORM
extern __shared__ KEY_TYPE minBuffer[];
KEY_TYPE* maxBuffer = minBuffer+blockDim.x;
KEY_TYPE minimum = MAX_KEY;
KEY_TYPE maximum = MIN_KEY;
__shared__ bool isLastFinishedBlock;
if (threadIdx.x == 0) {
isLastFinishedBlock = false;
}
// Each thread calculates the range of a subset of values.
for (unsigned int index = blockDim.x*blockIdx.x+threadIdx.x; index < length; index += blockDim.x*gridDim.x) {
KEY_TYPE value = getValue(data[index]);
minimum = min(minimum, value);
maximum = max(maximum, value);
}
// Now reduce them and save partial results
reduceMinMax(minimum, maximum, minBuffer, maxBuffer, &range[blockIdx.x * 2 + 0], &range[blockIdx.x * 2 + 1]);
__threadfence();
if (threadIdx.x == 0) {
isLastFinishedBlock = atomicAdd(&counters[0], 1) + 1 == gridDim.x;
}
__syncthreads();
// The last block reduce partial results
if (isLastFinishedBlock) {
for (unsigned int index = threadIdx.x; index < gridDim.x; index += blockDim.x) {
minimum = min(minimum, range[index * 2 + 0]);
maximum = max(maximum, range[index * 2 + 1]);
}
reduceMinMax(minimum, maximum, minBuffer, maxBuffer, &range[0], &range[1]);
}
#endif
// Clear the bucket counters in preparation for the next kernel.
for (unsigned int index = blockDim.x*blockIdx.x+threadIdx.x; index < numBuckets; index += blockDim.x*gridDim.x)
bucketOffset[index] = 0;
}
/**
* Assign elements to buckets. This version is optimized for uniformly distributed data.
*/
__global__ void assignElementsToBuckets(const DATA_TYPE* __restrict__ data, unsigned int length, unsigned int numBuckets, const KEY_TYPE* __restrict__ range,
unsigned int* __restrict__ bucketOffset, unsigned int* __restrict__ bucketOfElement, unsigned int* __restrict__ offsetInBucket) {
float minValue = (float) (range[0]);
float maxValue = (float) (range[1]);
float bucketWidth = (maxValue-minValue)/numBuckets;
for (unsigned int index = blockDim.x*blockIdx.x+threadIdx.x; index < length; index += blockDim.x*gridDim.x) {
float key = (float) getValue(data[index]);
unsigned int bucketIndex = min((unsigned int) ((key-minValue)/bucketWidth), numBuckets-1);
offsetInBucket[index] = atomicAdd(&bucketOffset[bucketIndex], 1);
bucketOfElement[index] = bucketIndex;
}
}
/**
* Assign elements to buckets. This version is optimized for non-uniformly distributed data.
*/
__global__ void assignElementsToBuckets2(const DATA_TYPE* __restrict__ data, unsigned int length, unsigned int numBuckets, const KEY_TYPE* __restrict__ range,
unsigned int* __restrict__ bucketOffset, unsigned int* __restrict__ bucketOfElement, unsigned int* __restrict__ offsetInBucket) {
// Load 64 datapoints and sort them to get an estimate of the data distribution.
__shared__ KEY_TYPE elements[64];
if (threadIdx.x < 64) {
int index = (int) (threadIdx.x*length/64.0);
elements[threadIdx.x] = getValue(data[index]);
}
__syncthreads();
for (unsigned int k = 2; k <= 64; k *= 2) {
for (unsigned int j = k/2; j > 0; j /= 2) {
if (threadIdx.x < 64) {
int ixj = threadIdx.x^j;
if (ixj > threadIdx.x) {
KEY_TYPE value1 = elements[threadIdx.x];
KEY_TYPE value2 = elements[ixj];
bool ascending = (threadIdx.x&k) == 0;
KEY_TYPE lowKey = (ascending ? value1 : value2);
KEY_TYPE highKey = (ascending ? value2 : value1);
if (lowKey > highKey) {
elements[threadIdx.x] = value2;
elements[ixj] = value1;
}
}
}
__syncthreads();
}
}
// Create a function composed of linear segments mapping data values to bucket indices.
__shared__ float segmentLowerBound[9];
__shared__ float segmentBaseIndex[9];
__shared__ float segmentIndexScale[9];
if (threadIdx.x == 0) {
segmentLowerBound[0] = elements[0]-0.2f*(elements[5]-elements[0]);
segmentLowerBound[1] = elements[5];
segmentLowerBound[2] = elements[10];
segmentLowerBound[3] = elements[20];
segmentLowerBound[4] = elements[30];
segmentLowerBound[5] = elements[40];
segmentLowerBound[6] = elements[50];
segmentLowerBound[7] = elements[60];
segmentLowerBound[8] = elements[63]+0.2f*(elements[63]-elements[58]);
segmentBaseIndex[0] = numBuckets/16;
segmentBaseIndex[1] = 3*numBuckets/16;
segmentBaseIndex[2] = 5*numBuckets/16;
segmentBaseIndex[3] = 7*numBuckets/16;
segmentBaseIndex[4] = 9*numBuckets/16;
segmentBaseIndex[5] = 11*numBuckets/16;
segmentBaseIndex[6] = 13*numBuckets/16;
segmentBaseIndex[7] = 15*numBuckets/16;
segmentBaseIndex[8] = numBuckets;
for (int i = 0; i < 8; i++)
if (segmentLowerBound[i+1] == segmentLowerBound[i])
segmentIndexScale[i] = 0;
else
segmentIndexScale[i] = (segmentBaseIndex[i+1]-segmentBaseIndex[i])/(segmentLowerBound[i+1]-segmentLowerBound[i]);
}
__syncthreads();
// Assign elements to buckets.
for (unsigned int index = blockDim.x*blockIdx.x+threadIdx.x; index < length; index += blockDim.x*gridDim.x) {
float key = (float) getValue(data[index]);
int segment;
for (segment = 0; segment < 7 && key > segmentLowerBound[segment+1]; segment++)
;
unsigned int bucketIndex = segmentBaseIndex[segment]+(key-segmentLowerBound[segment])*segmentIndexScale[segment];
bucketIndex = min(max(0, bucketIndex), numBuckets-1);
offsetInBucket[index] = atomicAdd(&bucketOffset[bucketIndex], 1);
bucketOfElement[index] = bucketIndex;
}
}
/**
* Sum the bucket sizes to compute the start position of each bucket. This kernel
* is executed as a single work group.
*/
__global__ __launch_bounds__(1024) void computeBucketPositions(unsigned int numBuckets, unsigned int* __restrict__ bucketOffset, unsigned int* __restrict__ counters) {
extern __shared__ unsigned int posBuffer[];
unsigned int globalOffset = 0;
for (unsigned int startBucket = 0; startBucket < numBuckets; startBucket += blockDim.x) {
// Load the bucket sizes into local memory.
unsigned int globalIndex = startBucket+threadIdx.x;
__syncthreads();
posBuffer[threadIdx.x] = (globalIndex < numBuckets ? bucketOffset[globalIndex] : 0);
__syncthreads();
// Perform a parallel prefix sum.
for (unsigned int step = 1; step < blockDim.x; step *= 2) {
unsigned int add = (threadIdx.x >= step ? posBuffer[threadIdx.x-step] : 0);
__syncthreads();
posBuffer[threadIdx.x] += add;
__syncthreads();
}
// Write the results back to global memory.
if (globalIndex < numBuckets)
bucketOffset[globalIndex] = posBuffer[threadIdx.x]+globalOffset;
globalOffset += posBuffer[blockDim.x-1];
}
if (threadIdx.x == 0) {
counters[0] = 0;
}
}
/**
* Copy the input data into the buckets for sorting.
*/
__global__ void copyDataToBuckets(const DATA_TYPE* __restrict__ data, DATA_TYPE* __restrict__ buckets, unsigned int length, const unsigned int* __restrict__ bucketOffset, const unsigned int* __restrict__ bucketOfElement, const unsigned int* __restrict__ offsetInBucket) {
for (unsigned int index = blockDim.x*blockIdx.x+threadIdx.x; index < length; index += blockDim.x*gridDim.x) {
DATA_TYPE element = data[index];
unsigned int bucketIndex = bucketOfElement[index];
unsigned int offset = (bucketIndex == 0 ? 0 : bucketOffset[bucketIndex-1]);
buckets[offset+offsetInBucket[index]] = element;
}
}
/**
* Sort the data in each bucket.
*/
__global__ void sortBuckets(DATA_TYPE* __restrict__ data, const DATA_TYPE* __restrict__ buckets, const unsigned int* __restrict__ bucketOffset) {
extern __shared__ DATA_TYPE dataBuffer[];
unsigned int index = blockIdx.x;
unsigned int startIndex = (index == 0 ? 0 : bucketOffset[index-1]);
unsigned int endIndex = bucketOffset[index];
unsigned int length = endIndex-startIndex;
unsigned int lenghtNextPow2 = length <= 2 ? length : (1 << (32 - __clz(length - 1)));
if (length <= blockDim.x) {
// Load the data into local memory.
if (threadIdx.x < length)
dataBuffer[threadIdx.x] = buckets[startIndex+threadIdx.x];
else if (threadIdx.x < lenghtNextPow2)
dataBuffer[threadIdx.x] = MAX_VALUE;
__syncthreads();
// Perform a bitonic sort in local memory.
for (unsigned int k = 2; k <= lenghtNextPow2; k *= 2) {
for (unsigned int j = k/2; j > 0; j /= 2) {
int ixj = threadIdx.x^j;
if (threadIdx.x < lenghtNextPow2 && ixj > threadIdx.x) {
DATA_TYPE value1 = dataBuffer[threadIdx.x];
DATA_TYPE value2 = dataBuffer[ixj];
bool ascending = (threadIdx.x&k) == 0;
KEY_TYPE lowKey = (ascending ? getValue(value1) : getValue(value2));
KEY_TYPE highKey = (ascending ? getValue(value2) : getValue(value1));
if (lowKey > highKey) {
dataBuffer[threadIdx.x] = value2;
dataBuffer[ixj] = value1;
}
}
__syncthreads();
}
}
// Write the data to the sorted array.
if (threadIdx.x < length)
data[startIndex+threadIdx.x] = dataBuffer[threadIdx.x];
}
else {
// Copy the bucket data over to the output array.
for (unsigned int i = threadIdx.x; i < length; i += blockDim.x)
data[startIndex+i] = buckets[startIndex+i];
__syncthreads();
// Perform a bitonic sort in global memory.
for (unsigned int k = 2; k <= lenghtNextPow2; k *= 2) {
for (unsigned int j = k/2; j > 0; j /= 2) {
for (unsigned int i = threadIdx.x; i < length; i += blockDim.x) {
int ixj = i^j;
if (ixj > i && ixj < length) {
DATA_TYPE value1 = data[startIndex+i];
DATA_TYPE value2 = data[startIndex+ixj];
bool ascending = (__popc(~i & (lenghtNextPow2 - k)) & 1) == 0;
KEY_TYPE lowKey = (ascending ? getValue(value1) : getValue(value2));
KEY_TYPE highKey = (ascending ? getValue(value2) : getValue(value1));
if (lowKey > highKey) {
data[startIndex+i] = value2;
data[startIndex+ixj] = value1;
}
}
}
__syncthreads();
}
}
}
}
}
platforms/hip/src/kernels/utilities.hip
0 → 100644
View file @
3b8df952
extern "C" {
/**
* This is called by the various functions below to clear a buffer.
*/
__device__ void clearSingleBuffer(int* __restrict__ buffer, int size) {
int index = blockDim.x*blockIdx.x+threadIdx.x;
int4* buffer4 = (int4*) buffer;
int sizeDiv4 = size/4;
while (index < sizeDiv4) {
buffer4[index] = make_int4(0);
index += blockDim.x*gridDim.x;
}
if (blockDim.x*blockIdx.x+threadIdx.x == 0)
for (int i = sizeDiv4*4; i < size; i++)
buffer[i] = 0;
}
/**
* Fill a buffer with 0.
*/
__global__ void clearBuffer(int* __restrict__ buffer, int size) {
clearSingleBuffer(buffer, size);
}
/**
* Fill two buffers with 0.
*/
__global__ void clearTwoBuffers(int* __restrict__ buffer1, int size1, int* __restrict__ buffer2, int size2) {
clearSingleBuffer(buffer1, size1);
clearSingleBuffer(buffer2, size2);
}
/**
* Fill three buffers with 0.
*/
__global__ void clearThreeBuffers(int* __restrict__ buffer1, int size1, int* __restrict__ buffer2, int size2, int* __restrict__ buffer3, int size3) {
clearSingleBuffer(buffer1, size1);
clearSingleBuffer(buffer2, size2);
clearSingleBuffer(buffer3, size3);
}
/**
* Fill four buffers with 0.
*/
__global__ void clearFourBuffers(int* __restrict__ buffer1, int size1, int* __restrict__ buffer2, int size2, int* __restrict__ buffer3, int size3, int* __restrict__ buffer4, int size4) {
clearSingleBuffer(buffer1, size1);
clearSingleBuffer(buffer2, size2);
clearSingleBuffer(buffer3, size3);
clearSingleBuffer(buffer4, size4);
}
/**
* Fill five buffers with 0.
*/
__global__ void clearFiveBuffers(int* __restrict__ buffer1, int size1, int* __restrict__ buffer2, int size2, int* __restrict__ buffer3, int size3, int* __restrict__ buffer4, int size4, int* __restrict__ buffer5, int size5) {
clearSingleBuffer(buffer1, size1);
clearSingleBuffer(buffer2, size2);
clearSingleBuffer(buffer3, size3);
clearSingleBuffer(buffer4, size4);
clearSingleBuffer(buffer5, size5);
}
/**
* Fill six buffers with 0.
*/
__global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __restrict__ buffer2, int size2, int* __restrict__ buffer3, int size3, int* __restrict__ buffer4, int size4, int* __restrict__ buffer5, int size5, int* __restrict__ buffer6, int size6) {
clearSingleBuffer(buffer1, size1);
clearSingleBuffer(buffer2, size2);
clearSingleBuffer(buffer3, size3);
clearSingleBuffer(buffer4, size4);
clearSingleBuffer(buffer5, size5);
clearSingleBuffer(buffer6, size6);
}
/**
* Sum the energy buffer.
*/
__global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __restrict__ result, int bufferSize, int workGroupSize) {
extern __shared__ mixed tempBuffer[];
const unsigned int thread = threadIdx.x;
mixed sum = 0;
for (unsigned int index = blockDim.x*blockIdx.x+threadIdx.x; index < bufferSize; index += blockDim.x*gridDim.x)
sum += energyBuffer[index];
tempBuffer[thread] = sum;
for (int i = 1; i < workGroupSize; i *= 2) {
__syncthreads();
if (thread%(i*2) == 0 && thread+i < workGroupSize)
tempBuffer[thread] += tempBuffer[thread+i];
}
if (thread == 0)
result[blockIdx.x] = tempBuffer[0];
}
/**
* Record the atomic charges into the posq array.
*/
__global__ void setCharges(real* __restrict__ charges, real4* __restrict__ posq, int* __restrict__ atomOrder, int numAtoms) {
for (int i = blockDim.x*blockIdx.x+threadIdx.x; i < numAtoms; i += blockDim.x*gridDim.x)
posq[i].w = charges[atomOrder[i]];
}
}
platforms/hip/src/kernels/vectorOps.hip
0 → 100644
View file @
3b8df952
/**
* This file defines vector operations to simplify code elsewhere.
*/
// Versions of make_x() that take a single value and set all components to that.
inline __device__ int2 make_int2(int a) {
return make_int2(a, a);
}
inline __device__ int3 make_int3(int a) {
return make_int3(a, a, a);
}
inline __device__ int4 make_int4(int a) {
return make_int4(a, a, a, a);
}
inline __device__ float2 make_float2(float a) {
return make_float2(a, a);
}
inline __device__ float3 make_float3(float a) {
return make_float3(a, a, a);
}
inline __device__ float4 make_float4(float a) {
return make_float4(a, a, a, a);
}
inline __device__ double2 make_double2(double a) {
return make_double2(a, a);
}
inline __device__ double3 make_double3(double a) {
return make_double3(a, a, a);
}
inline __device__ double4 make_double4(double a) {
return make_double4(a, a, a, a);
}
// Multiply a vector by a constant.
inline __device__ int2 operator*(int2 a, int b) {
return make_int2(a.x*b, a.y*b);
}
inline __device__ int3 operator*(int3 a, int b) {
return make_int3(a.x*b, a.y*b, a.z*b);
}
inline __device__ int4 operator*(int4 a, int b) {
return make_int4(a.x*b, a.y*b, a.z*b, a.w*b);
}
inline __device__ int2 operator*(int a, int2 b) {
return make_int2(a*b.x, a*b.y);
}
inline __device__ int3 operator*(int a, int3 b) {
return make_int3(a*b.x, a*b.y, a*b.z);
}
inline __device__ int4 operator*(int a, int4 b) {
return make_int4(a*b.x, a*b.y, a*b.z, a*b.w);
}
inline __device__ float2 operator*(float2 a, float b) {
return make_float2(a.x*b, a.y*b);
}
inline __device__ float3 operator*(float3 a, float b) {
return make_float3(a.x*b, a.y*b, a.z*b);
}
inline __device__ float4 operator*(float4 a, float b) {
return make_float4(a.x*b, a.y*b, a.z*b, a.w*b);
}
inline __device__ float2 operator*(float a, float2 b) {
return make_float2(a*b.x, a*b.y);
}
inline __device__ float3 operator*(float a, float3 b) {
return make_float3(a*b.x, a*b.y, a*b.z);
}
inline __device__ float4 operator*(float a, float4 b) {
return make_float4(a*b.x, a*b.y, a*b.z, a*b.w);
}
inline __device__ double2 operator*(double2 a, double b) {
return make_double2(a.x*b, a.y*b);
}
inline __device__ double3 operator*(double3 a, double b) {
return make_double3(a.x*b, a.y*b, a.z*b);
}
inline __device__ double4 operator*(double4 a, double b) {
return make_double4(a.x*b, a.y*b, a.z*b, a.w*b);
}
inline __device__ double2 operator*(double a, double2 b) {
return make_double2(a*b.x, a*b.y);
}
inline __device__ double3 operator*(double a, double3 b) {
return make_double3(a*b.x, a*b.y, a*b.z);
}
inline __device__ double4 operator*(double a, double4 b) {
return make_double4(a*b.x, a*b.y, a*b.z, a*b.w);
}
// Divide a vector by a constant.
inline __device__ int2 operator/(int2 a, int b) {
return make_int2(a.x/b, a.y/b);
}
inline __device__ int3 operator/(int3 a, int b) {
return make_int3(a.x/b, a.y/b, a.z/b);
}
inline __device__ int4 operator/(int4 a, int b) {
return make_int4(a.x/b, a.y/b, a.z/b, a.w/b);
}
inline __device__ float2 operator/(float2 a, float b) {
float scale = 1.0f/b;
return a*scale;
}
inline __device__ float3 operator/(float3 a, float b) {
float scale = 1.0f/b;
return a*scale;
}
inline __device__ float4 operator/(float4 a, float b) {
float scale = 1.0f/b;
return a*scale;
}
inline __device__ double2 operator/(double2 a, double b) {
double scale = 1.0/b;
return a*scale;
}
inline __device__ double3 operator/(double3 a, double b) {
double scale = 1.0/b;
return a*scale;
}
inline __device__ double4 operator/(double4 a, double b) {
double scale = 1.0/b;
return a*scale;
}
// *= operator (multiply vector by constant)
inline __device__ void operator*=(int2& a, int b) {
a.x *= b; a.y *= b;
}
inline __device__ void operator*=(int3& a, int b) {
a.x *= b; a.y *= b; a.z *= b;
}
inline __device__ void operator*=(int4& a, int b) {
a.x *= b; a.y *= b; a.z *= b; a.w *= b;
}
inline __device__ void operator*=(float2& a, float b) {
a.x *= b; a.y *= b;
}
inline __device__ void operator*=(float3& a, float b) {
a.x *= b; a.y *= b; a.z *= b;
}
inline __device__ void operator*=(float4& a, float b) {
a.x *= b; a.y *= b; a.z *= b; a.w *= b;
}
inline __device__ void operator*=(double2& a, double b) {
a.x *= b; a.y *= b;
}
inline __device__ void operator*=(double3& a, double b) {
a.x *= b; a.y *= b; a.z *= b;
}
inline __device__ void operator*=(double4& a, double b) {
a.x *= b; a.y *= b; a.z *= b; a.w *= b;
}
// Dot product
inline __device__ float dot(float3 a, float3 b) {
return a.x*b.x+a.y*b.y+a.z*b.z;
}
inline __device__ double dot(double3 a, double3 b) {
return a.x*b.x+a.y*b.y+a.z*b.z;
}
// Cross product
inline __device__ float3 cross(float3 a, float3 b) {
return make_float3(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
}
inline __device__ float4 cross(float4 a, float4 b) {
return make_float4(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x, 0.0f);
}
inline __device__ double3 cross(double3 a, double3 b) {
return make_double3(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
}
inline __device__ double4 cross(double4 a, double4 b) {
return make_double4(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x, 0.0);
}
// Normalize a vector
inline __device__ float2 normalize(float2 a) {
return a*rsqrtf(a.x*a.x+a.y*a.y);
}
inline __device__ float3 normalize(float3 a) {
return a*rsqrtf(a.x*a.x+a.y*a.y+a.z*a.z);
}
inline __device__ float4 normalize(float4 a) {
return a*rsqrtf(a.x*a.x+a.y*a.y+a.z*a.z+a.w*a.w);
}
inline __device__ double2 normalize(double2 a) {
return a*rsqrt(a.x*a.x+a.y*a.y);
}
inline __device__ double3 normalize(double3 a) {
return a*rsqrt(a.x*a.x+a.y*a.y+a.z*a.z);
}
inline __device__ double4 normalize(double4 a) {
return a*rsqrt(a.x*a.x+a.y*a.y+a.z*a.z+a.w*a.w);
}
// Strip off the fourth component of a vector.
inline __device__ short3 trimTo3(short4 v) {
return make_short3(v.x, v.y, v.z);
}
inline __device__ int3 trimTo3(int4 v) {
return make_int3(v.x, v.y, v.z);
}
inline __device__ float3 trimTo3(float4 v) {
return make_float3(v.x, v.y, v.z);
}
inline __device__ double3 trimTo3(double4 v) {
return make_double3(v.x, v.y, v.z);
}
platforms/hip/tests/CMakeLists.txt
0 → 100644
View file @
3b8df952
#
# Testing
#
ENABLE_TESTING
()
SET
(
OPENMM_BUILD_HIP_DOUBLE_PRECISION_TESTS TRUE CACHE BOOL
"Whether to build double precision versions of HIP test cases"
)
SET
(
INCLUDE_SERIALIZATION FALSE
)
#SET( INCLUDE_SERIALIZATION TRUE )
IF
(
INCLUDE_SERIALIZATION
)
INCLUDE_DIRECTORIES
(
${
OPENMM_DIR
}
/serialization/include
)
SET
(
SHARED_OPENMM_SERIALIZATION
"OpenMMSerialization"
)
ENDIF
(
INCLUDE_SERIALIZATION
)
# Automatically create tests using files named "Test*.cpp"
FILE
(
GLOB TEST_PROGS
"*Test*.cpp"
)
FOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
GET_FILENAME_COMPONENT
(
TEST_ROOT
${
TEST_PROG
}
NAME_WE
)
# Link with shared library
ADD_EXECUTABLE
(
${
TEST_ROOT
}
${
TEST_PROG
}
)
TARGET_LINK_LIBRARIES
(
${
TEST_ROOT
}
${
SHARED_TARGET
}
)
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_LINK_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
ADD_TEST
(
${
TEST_ROOT
}
Single
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
single
)
IF
(
OPENMM_BUILD_HIP_DOUBLE_PRECISION_TESTS
)
ADD_TEST
(
${
TEST_ROOT
}
Mixed
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
mixed
)
ADD_TEST
(
${
TEST_ROOT
}
Double
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
double
)
ENDIF
(
OPENMM_BUILD_HIP_DOUBLE_PRECISION_TESTS
)
ENDFOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
platforms/hip/tests/HipTests.h
0 → 100644
View file @
3b8df952
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015-2016 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#ifdef WIN32
#define _USE_MATH_DEFINES // Needed to get M_PI
#endif
#include "HipPlatform.h"
#include <string>
OpenMM
::
HipPlatform
platform
;
void
initializeTests
(
int
argc
,
char
*
argv
[])
{
if
(
argc
>
1
)
platform
.
setPropertyDefaultValue
(
"Precision"
,
std
::
string
(
argv
[
1
]));
if
(
argc
>
2
)
platform
.
setPropertyDefaultValue
(
"DeviceIndex"
,
std
::
string
(
argv
[
2
]));
}
platforms/hip/tests/TestHipATMForce.cpp
0 → 100644
View file @
3b8df952
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2023 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestATMForce.h"
void
runPlatformTests
()
{
}
platforms/hip/tests/TestHipAndersenThermostat.cpp
0 → 100644
View file @
3b8df952
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestAndersenThermostat.h"
void
runPlatformTests
()
{
}
platforms/hip/tests/TestHipBrownianIntegrator.cpp
0 → 100644
View file @
3b8df952
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestBrownianIntegrator.h"
void
runPlatformTests
()
{
}
platforms/hip/tests/TestHipCMAPTorsionForce.cpp
0 → 100644
View file @
3b8df952
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestCMAPTorsionForce.h"
void
runPlatformTests
()
{
}
platforms/hip/tests/TestHipCMMotionRemover.cpp
0 → 100644
View file @
3b8df952
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestCMMotionRemover.h"
void
runPlatformTests
()
{
}
platforms/hip/tests/TestHipCheckpoints.cpp
0 → 100644
View file @
3b8df952
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012-2015 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestCheckpoints.h"
void
testCheckpoint
()
{
const
int
numParticles
=
100
;
const
double
boxSize
=
5.0
;
const
double
temperature
=
200.0
;
System
system
;
system
.
addForce
(
new
AndersenThermostat
(
0.0
,
100.0
));
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
system
.
addForce
(
nonbonded
);
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
CutoffPeriodic
);
vector
<
Vec3
>
positions
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
1.0
);
nonbonded
->
addParticle
(
i
%
2
==
0
?
0.1
:
-
0.1
,
0.2
,
0.1
);
bool
clash
;
do
{
clash
=
false
;
positions
[
i
]
=
Vec3
(
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
));
for
(
int
j
=
0
;
j
<
i
;
j
++
)
{
Vec3
delta
=
positions
[
i
]
-
positions
[
j
];
if
(
sqrt
(
delta
.
dot
(
delta
))
<
0.1
)
clash
=
true
;
}
}
while
(
clash
);
}
VerletIntegrator
integrator
(
0.001
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
context
.
setPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
context
.
setParameter
(
AndersenThermostat
::
Temperature
(),
temperature
);
// Run for a little while.
integrator
.
step
(
100
);
// Record the current state and make a checkpoint.
State
s1
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Parameters
);
stringstream
stream1
(
ios_base
::
out
|
ios_base
::
in
|
ios_base
::
binary
);
context
.
createCheckpoint
(
stream1
);
// Continue the simulation for a few more steps and record the state again.
integrator
.
step
(
10
);
State
s2
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Parameters
);
// Restore from the checkpoint and see if everything gets restored correctly.
context
.
setPeriodicBoxVectors
(
Vec3
(
2
*
boxSize
,
0
,
0
),
Vec3
(
0
,
2
*
boxSize
,
0
),
Vec3
(
0
,
0
,
2
*
boxSize
));
context
.
setParameter
(
AndersenThermostat
::
Temperature
(),
temperature
+
10
);
context
.
loadCheckpoint
(
stream1
);
State
s3
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Parameters
);
compareStates
(
s1
,
s3
);
// Now simulate from there and see if the trajectory is identical.
integrator
.
step
(
10
);
State
s4
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Parameters
);
compareStates
(
s2
,
s4
);
// Create a new Context that uses multiple devices.
string
deviceIndex
=
platform
.
getPropertyValue
(
context
,
HipPlatform
::
HipDeviceIndex
());
map
<
string
,
string
>
props
;
props
[
HipPlatform
::
HipDeviceIndex
()]
=
deviceIndex
+
","
+
deviceIndex
;
VerletIntegrator
integrator2
(
0.001
);
Context
context2
(
system
,
integrator2
,
platform
,
props
);
context2
.
setPositions
(
positions
);
context2
.
setPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
context2
.
setParameter
(
AndersenThermostat
::
Temperature
(),
temperature
);
// Now repeat all of the above tests with it.
integrator2
.
step
(
100
);
State
s5
=
context2
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Parameters
);
stringstream
stream2
(
ios_base
::
out
|
ios_base
::
in
|
ios_base
::
binary
);
context2
.
createCheckpoint
(
stream2
);
integrator2
.
step
(
10
);
State
s6
=
context2
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Parameters
);
context2
.
setPeriodicBoxVectors
(
Vec3
(
2
*
boxSize
,
0
,
0
),
Vec3
(
0
,
2
*
boxSize
,
0
),
Vec3
(
0
,
0
,
2
*
boxSize
));
context2
.
setParameter
(
AndersenThermostat
::
Temperature
(),
temperature
+
10
);
context2
.
loadCheckpoint
(
stream2
);
State
s7
=
context2
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Parameters
);
compareStates
(
s5
,
s7
);
integrator2
.
step
(
10
);
State
s8
=
context2
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Parameters
);
compareStates
(
s6
,
s8
);
// See if a checkpoint created from one Context can be loaded into a different one.
VerletIntegrator
integrator3
(
0.001
);
Context
context3
(
system
,
integrator3
,
platform
);
stream1
.
seekg
(
0
,
stream1
.
beg
);
context3
.
loadCheckpoint
(
stream1
);
State
s9
=
context3
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Parameters
|
State
::
Energy
);
compareStates
(
s1
,
s9
);
}
void
runPlatformTests
()
{
testCheckpoint
();
}
platforms/hip/tests/TestHipCompoundIntegrator.cpp
0 → 100644
View file @
3b8df952
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestCompoundIntegrator.h"
void
runPlatformTests
()
{
}
platforms/hip/tests/TestHipCustomAngleForce.cpp
0 → 100644
View file @
3b8df952
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestCustomAngleForce.h"
void
testParallelComputation
()
{
System
system
;
const
int
numParticles
=
200
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
CustomAngleForce
*
force
=
new
CustomAngleForce
(
"(theta-1.1)^2"
);
vector
<
double
>
params
;
for
(
int
i
=
2
;
i
<
numParticles
;
i
++
)
force
->
addAngle
(
i
-
2
,
i
-
1
,
i
,
params
);
system
.
addForce
(
force
);
vector
<
Vec3
>
positions
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
positions
[
i
]
=
Vec3
(
i
,
i
%
2
,
0
);
VerletIntegrator
integrator1
(
0.01
);
Context
context1
(
system
,
integrator1
,
platform
);
context1
.
setPositions
(
positions
);
State
state1
=
context1
.
getState
(
State
::
Forces
|
State
::
Energy
);
VerletIntegrator
integrator2
(
0.01
);
string
deviceIndex
=
platform
.
getPropertyValue
(
context1
,
HipPlatform
::
HipDeviceIndex
());
map
<
string
,
string
>
props
;
props
[
HipPlatform
::
HipDeviceIndex
()]
=
deviceIndex
+
","
+
deviceIndex
;
Context
context2
(
system
,
integrator2
,
platform
,
props
);
context2
.
setPositions
(
positions
);
State
state2
=
context2
.
getState
(
State
::
Forces
|
State
::
Energy
);
ASSERT_EQUAL_TOL
(
state1
.
getPotentialEnergy
(),
state2
.
getPotentialEnergy
(),
1e-5
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
i
],
state2
.
getForces
()[
i
],
1e-5
);
}
void
runPlatformTests
()
{
testParallelComputation
();
}
platforms/hip/tests/TestHipCustomBondForce.cpp
0 → 100644
View file @
3b8df952
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestCustomBondForce.h"
void
testParallelComputation
()
{
System
system
;
const
int
numParticles
=
200
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
CustomBondForce
*
force
=
new
CustomBondForce
((
"(r-1.1)^2"
));
vector
<
double
>
params
;
for
(
int
i
=
1
;
i
<
numParticles
;
i
++
)
force
->
addBond
(
i
-
1
,
i
,
params
);
system
.
addForce
(
force
);
vector
<
Vec3
>
positions
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
positions
[
i
]
=
Vec3
(
i
,
0
,
0
);
VerletIntegrator
integrator1
(
0.01
);
Context
context1
(
system
,
integrator1
,
platform
);
context1
.
setPositions
(
positions
);
State
state1
=
context1
.
getState
(
State
::
Forces
|
State
::
Energy
);
VerletIntegrator
integrator2
(
0.01
);
string
deviceIndex
=
platform
.
getPropertyValue
(
context1
,
HipPlatform
::
HipDeviceIndex
());
map
<
string
,
string
>
props
;
props
[
HipPlatform
::
HipDeviceIndex
()]
=
deviceIndex
+
","
+
deviceIndex
;
Context
context2
(
system
,
integrator2
,
platform
,
props
);
context2
.
setPositions
(
positions
);
State
state2
=
context2
.
getState
(
State
::
Forces
|
State
::
Energy
);
ASSERT_EQUAL_TOL
(
state1
.
getPotentialEnergy
(),
state2
.
getPotentialEnergy
(),
1e-5
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
i
],
state2
.
getForces
()[
i
],
1e-5
);
}
void
runPlatformTests
()
{
testParallelComputation
();
}
platforms/hip/tests/TestHipCustomCPPForce.cpp
0 → 100644
View file @
3b8df952
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2023 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "HipTests.h"
#include "TestCustomCPPForce.h"
void
runPlatformTests
()
{
}
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment