Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
aa6abbbe
Commit
aa6abbbe
authored
May 16, 2013
by
Yutong Zhao
Browse files
Added documentation for the GPU nonbonded algorithm.
parent
dd1ae5c1
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
65 additions
and
5 deletions
+65
-5
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+65
-5
No files found.
platforms/cuda/src/kernels/nonbonded.cu
View file @
aa6abbbe
...
@@ -11,7 +11,64 @@ typedef struct {
...
@@ -11,7 +11,64 @@ typedef struct {
}
AtomData
;
}
AtomData
;
/**
/**
* Compute nonbonded interactions.
* Compute nonbonded interactions. The kernel is separated into two parts,
* tiles with exclusions and tiles without exclusions. It relies heavily on
* implicit warp-level synchronization. A tile is defined by two atom blocks
* each of warpsize. Each warp computes a range of tiles.
*
* Tiles with exclusions compute the entire set of interactions across
* atom blocks, equal to warpsize*warpsize. In order to avoid access conflicts
* the forces are computed and accumulated diagonally in the manner shown below
* where, suppose
*
* [a-h] comprise atom block 1, [i-p] comprise atom block 2
*
* 1 denotes the first set of calculations within the warp
* 2 denotes the second set of calculations within the warp
* ... etc.
*
* threads
* 0 1 2 3 4 5 6 7
* atom1
* L a b c d e f g h
* o i 1 2 3 4 5 6 7 8
* c j 8 1 2 3 4 5 6 7
* a k 7 8 1 2 3 4 5 6
* l l 6 7 8 1 2 3 4 5
* D m 5 6 7 8 1 2 3 4
* a n 4 5 6 7 8 1 2 3
* t o 3 4 5 6 7 8 1 2
* a p 2 3 4 5 6 7 8 1
*
* TODO: Implement shuffle as opposed to using nonbonded.
*
* Tiles without exclusions read off directly from the neighbourlist interactingAtoms
* and follows the same force accumulation method. If more there are more interactingTiles
* than the size of the neighbourlist initially allocated, the neighbourlist is rebuilt
* and the full tileset.
*
* [out]forceBuffers - forces on each atom to eventually be accumulated
* [out]energyBuffer - energyBuffer to eventually be accumulated
* [in]posq - x,y,z,charge
* [in]exclusions - 1024-bit flags denoting atom-atom exclusions for each tile
* [in]exclusionTiles - x,y denotes the indices of tiles that have an exclusion
* [in]startTileIndex - index into first tile to be processed
* [in]numTileIndices - number of tiles this context is responsible for processing
* [in]ushort2 tiles - x component lists the tiles that interact with each tile
* - y component not used currently
* [in]interactionCount - total number of tiles that have an interaction
* [in]maxTiles - stores the size of the neighbourlist in case it needs
* - to be expanded
* [in]periodicBoxSize - size of the Periodic Box, last dimension (w) not used
* [in]invPeriodicBox - inverse of the periodicBoxSize, pre-computed for speed
* [in]blockCenter - the center of each block in euclidean coordinates
* [in]blockSize - size of the each block, radiating from the center
* - x is half the distance of total length
* - y is half the distance of total width
* - z is half the distance of total height
* - w is not used
* [in]interactingAtoms - a list of interactions within a given tile
*
*/
*/
extern
"C"
__global__
void
computeNonbonded
(
extern
"C"
__global__
void
computeNonbonded
(
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
tileflags
*
__restrict__
exclusions
,
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
tileflags
*
__restrict__
exclusions
,
...
@@ -22,9 +79,9 @@ extern "C" __global__ void computeNonbonded(
...
@@ -22,9 +79,9 @@ extern "C" __global__ void computeNonbonded(
#endif
#endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
// global warpIndex
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
// index within the warp
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
// block warpIndex
real
energy
=
0.0
f
;
real
energy
=
0.0
f
;
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
...
@@ -162,6 +219,8 @@ extern "C" __global__ void computeNonbonded(
...
@@ -162,6 +219,8 @@ extern "C" __global__ void computeNonbonded(
#ifdef USE_EXCLUSIONS
#ifdef USE_EXCLUSIONS
excl
>>=
1
;
excl
>>=
1
;
#endif
#endif
// cycles the indices
// 0 1 2 3 4 5 6 7 -> 1 2 3 4 5 6 7 0
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
}
}
}
...
@@ -245,7 +304,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -245,7 +304,7 @@ extern "C" __global__ void computeNonbonded(
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
// Load atom data for this tile.
// Load atom data for this tile.
real4
posq1
=
posq
[
atom1
];
real4
posq1
=
posq
[
atom1
];
LOAD_ATOM1_PARAMETERS
LOAD_ATOM1_PARAMETERS
...
@@ -257,6 +316,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -257,6 +316,7 @@ extern "C" __global__ void computeNonbonded(
#endif
#endif
atomIndices
[
threadIdx
.
x
]
=
j
;
atomIndices
[
threadIdx
.
x
]
=
j
;
if
(
j
<
PADDED_NUM_ATOMS
)
{
if
(
j
<
PADDED_NUM_ATOMS
)
{
// Load position of atom j from from global memory
real4
tempPosq
=
posq
[
j
];
real4
tempPosq
=
posq
[
j
];
localData
[
localAtomIndex
].
x
=
tempPosq
.
x
;
localData
[
localAtomIndex
].
x
=
tempPosq
.
x
;
localData
[
localAtomIndex
].
y
=
tempPosq
.
y
;
localData
[
localAtomIndex
].
y
=
tempPosq
.
y
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment