Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
c89cbcdb
Unverified
Commit
c89cbcdb
authored
Mar 14, 2018
by
peastman
Committed by
GitHub
Mar 14, 2018
Browse files
Merge pull request #2014 from peastman/groups
CustomNonbondedForce with interaction groups uses neighbor lists
parents
c6df2891
2df35b4e
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
390 additions
and
68 deletions
+390
-68
platforms/cpu/src/CpuCustomNonbondedForce.cpp
platforms/cpu/src/CpuCustomNonbondedForce.cpp
+3
-1
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+4
-4
platforms/cuda/include/CudaNonbondedUtilities.h
platforms/cuda/include/CudaNonbondedUtilities.h
+12
-0
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+65
-24
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+9
-4
platforms/cuda/src/kernels/customNonbondedGroups.cu
platforms/cuda/src/kernels/customNonbondedGroups.cu
+75
-2
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+3
-3
platforms/opencl/include/OpenCLNonbondedUtilities.h
platforms/opencl/include/OpenCLNonbondedUtilities.h
+12
-0
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+60
-23
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+9
-4
platforms/opencl/src/kernels/customNonbondedGroups.cl
platforms/opencl/src/kernels/customNonbondedGroups.cl
+74
-2
tests/TestCustomNonbondedForce.h
tests/TestCustomNonbondedForce.h
+64
-1
No files found.
platforms/cpu/src/CpuCustomNonbondedForce.cpp
View file @
c89cbcdb
...
...
@@ -252,7 +252,9 @@ void CpuCustomNonbondedForce::calculateOneIxn(int ii, int jj, ThreadData& data,
// accumulate forces
double
dEdR
=
(
includeForce
?
data
.
forceExpression
.
evaluate
()
/
r
:
0.0
);
double
energy
=
(
includeEnergy
?
data
.
energyExpression
.
evaluate
()
:
0.0
);
double
energy
=
0.0
;
if
(
includeEnergy
||
(
useSwitch
&&
r
>
switchingDistance
))
energy
=
data
.
energyExpression
.
evaluate
();
double
switchValue
=
1.0
;
if
(
useSwitch
)
{
if
(
r
>
switchingDistance
)
{
...
...
platforms/cuda/include/CudaKernels.h
View file @
c89cbcdb
...
...
@@ -763,15 +763,15 @@ private:
ForceInfo
*
info
;
CudaParameterSet
*
params
;
CudaArray
globals
;
CudaArray
interactionGroupData
;
CUfunction
interactionGroupKernel
;
std
::
vector
<
void
*>
interactionGroupArgs
;
CudaArray
interactionGroupData
,
filteredGroupData
,
numGroupTiles
;
CUfunction
interactionGroupKernel
,
prepareNeighborListKernel
,
buildNeighborListKernel
;
std
::
vector
<
void
*>
interactionGroupArgs
,
prepareNeighborListArgs
,
buildNeighborListArgs
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
CudaArray
>
tabulatedFunctions
;
double
longRangeCoefficient
;
std
::
vector
<
double
>
longRangeCoefficientDerivs
;
bool
hasInitializedLongRangeCorrection
,
hasInitializedKernel
,
hasParamDerivs
;
bool
hasInitializedLongRangeCorrection
,
hasInitializedKernel
,
hasParamDerivs
,
useNeighborList
;
int
numGroupThreadBlocks
;
CustomNonbondedForce
*
forceCopy
;
const
System
&
system
;
...
...
platforms/cuda/include/CudaNonbondedUtilities.h
View file @
c89cbcdb
...
...
@@ -142,6 +142,11 @@ public:
* Get the maximum cutoff distance used by any force group.
*/
double
getMaxCutoffDistance
();
/**
* Given a nonbonded cutoff, get the padded cutoff distance used in computing
* the neighbor list.
*/
double
padCutoff
(
double
cutoff
);
/**
* Prepare to compute interactions. This updates the neighbor list.
*/
...
...
@@ -220,6 +225,13 @@ public:
CudaArray
&
getExclusionRowIndices
()
{
return
exclusionRowIndices
;
}
/**
* Get the array containing a flag for whether the neighbor list was rebuilt
* on the most recent call to prepareInteractions().
*/
CudaArray
&
getRebuildNeighborList
()
{
return
rebuildNeighborList
;
}
/**
* Get the index of the first tile this context is responsible for processing.
*/
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
c89cbcdb
...
...
@@ -54,6 +54,7 @@
#include "jama_eig.h"
#include <algorithm>
#include <cmath>
#include <iterator>
#include <set>
using
namespace
OpenMM
;
...
...
@@ -2429,7 +2430,8 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
vector
<
vector
<
int
>
>
atomLists
;
vector
<
pair
<
int
,
int
>
>
tiles
;
map<pair<int, int>, int> duplicateInteractions;
vector
<
int
>
tileGroup
;
vector
<
vector
<
int
>
>
duplicateAtomsForGroup
;
for
(
int
group
=
0
;
group
<
force
.
getNumInteractionGroups
();
group
++
)
{
// Get the list of atoms in this group and sort them.
...
...
@@ -2440,6 +2442,10 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
atoms2
.
insert
(
atoms2
.
begin
(),
set2
.
begin
(),
set2
.
end
());
sort
(
atoms1
.
begin
(),
atoms1
.
end
());
sort
(
atoms2
.
begin
(),
atoms2
.
end
());
duplicateAtomsForGroup
.
push_back
(
vector
<
int
>
());
set_intersection
(
set1
.
begin
(),
set1
.
end
(),
set2
.
begin
(),
set2
.
end
(),
inserter
(
duplicateAtomsForGroup
[
group
],
duplicateAtomsForGroup
[
group
].
begin
()));
sort
(
duplicateAtomsForGroup
[
group
].
begin
(),
duplicateAtomsForGroup
[
group
].
end
());
// Find how many tiles we will create for this group.
...
...
@@ -2451,9 +2457,12 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
// Add the tiles.
int
firstTile
=
tiles
.
size
();
for
(
int
i
=
0
;
i
<
numBlocks1
;
i
++
)
for (int j = 0; j < numBlocks2; j++)
for
(
int
j
=
0
;
j
<
numBlocks2
;
j
++
)
{
tiles
.
push_back
(
make_pair
(
atomLists
.
size
()
+
i
,
atomLists
.
size
()
+
numBlocks1
+
j
));
tileGroup
.
push_back
(
group
);
}
// Add the atom lists.
...
...
@@ -2473,22 +2482,6 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
atoms
.
push_back
(
atoms2
[
j
]);
atomLists
.
push_back
(
atoms
);
}
// If this group contains duplicate interactions, record that we need to skip them once.
for (int a1 : atoms1) {
if (set2.find(a1) == set2.end())
continue;
for (int j = 0; j < (int) atoms2.size() && atoms2[j] < a1; j++) {
int a2 = atoms2[j];
if (set1.find(a2) != set1.end()) {
pair<int, int> key = make_pair(a2, a1);
if (duplicateInteractions.find(key) == duplicateInteractions.end())
duplicateInteractions[key] = 0;
duplicateInteractions[key]++;
}
}
}
}
// Build a lookup table for quickly identifying excluded interactions.
...
...
@@ -2506,15 +2499,18 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
vector
<
vector
<
int
>
>
exclusionFlags
(
tiles
.
size
());
vector
<
pair
<
int
,
int
>
>
tileOrder
;
for
(
int
tile
=
0
;
tile
<
tiles
.
size
();
tile
++
)
{
bool
swapped
=
false
;
if
(
atomLists
[
tiles
[
tile
].
first
].
size
()
<
atomLists
[
tiles
[
tile
].
second
].
size
())
{
// For efficiency, we want the first axis to be the larger one.
int
swap
=
tiles
[
tile
].
first
;
tiles
[
tile
].
first
=
tiles
[
tile
].
second
;
tiles
[
tile
].
second
=
swap
;
swapped
=
true
;
}
vector
<
int
>&
atoms1
=
atomLists
[
tiles
[
tile
].
first
];
vector
<
int
>&
atoms2
=
atomLists
[
tiles
[
tile
].
second
];
vector
<
int
>&
duplicateAtoms
=
duplicateAtomsForGroup
[
tileGroup
[
tile
]];
vector
<
int
>
flags
(
atoms1
.
size
(),
(
int
)
(
1LL
<<
atoms2
.
size
())
-
1
);
int
numExcluded
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
atoms1
.
size
();
i
++
)
...
...
@@ -2525,11 +2521,10 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
pair
<
int
,
int
>
key
=
make_pair
(
min
(
a1
,
a2
),
max
(
a1
,
a2
));
if
(
a1
==
a2
||
exclusions
.
find
(
key
)
!=
exclusions
.
end
())
isExcluded
=
true
;
// This is an excluded interaction.
else if (
duplicateInteractions.find(key) != duplicateInteractions.end() && duplicateInteractions[key] > 0
) {
else
if
(
(
a1
>
a2
)
==
swapped
&&
binary_search
(
duplicateAtoms
.
begin
(),
duplicateAtoms
.
end
(),
a1
)
&&
binary_search
(
duplicateAtoms
.
begin
(),
duplicateAtoms
.
end
(),
a2
)
)
{
// Both atoms are in both sets, so skip duplicate interactions.
isExcluded
=
true
;
duplicateInteractions[key]--;
}
if
(
isExcluded
)
{
flags
[
i
]
&=
-
1
-
(
1
<<
j
);
...
...
@@ -2584,6 +2579,16 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
}
interactionGroupData
.
initialize
<
int4
>
(
cu
,
groupData
.
size
(),
"interactionGroupData"
);
interactionGroupData
.
upload
(
groupData
);
numGroupTiles
.
initialize
<
int
>
(
cu
,
1
,
"numGroupTiles"
);
// Allocate space for a neighbor list, if necessary.
if
(
force
.
getNonbondedMethod
()
!=
CustomNonbondedForce
::
NoCutoff
&&
groupData
.
size
()
>
cu
.
getNumThreadBlocks
())
{
filteredGroupData
.
initialize
<
int4
>
(
cu
,
groupData
.
size
(),
"filteredGroupData"
);
interactionGroupData
.
copyTo
(
filteredGroupData
);
int
numTiles
=
groupData
.
size
()
/
32
;
numGroupTiles
.
upload
(
&
numTiles
);
}
// Create the kernel.
...
...
@@ -2662,11 +2667,16 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
defines
[
"USE_CUTOFF"
]
=
"1"
;
if
(
force
.
getNonbondedMethod
()
==
CustomNonbondedForce
::
CutoffPeriodic
)
defines
[
"USE_PERIODIC"
]
=
"1"
;
defines["LOCAL_MEMORY_SIZE"] = cu.intToString(max(32, cu.getNonbondedUtilities().getForceThreadBlockSize()));
int
localMemorySize
=
max
(
32
,
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
());
defines
[
"LOCAL_MEMORY_SIZE"
]
=
cu
.
intToString
(
localMemorySize
);
defines
[
"WARPS_IN_BLOCK"
]
=
cu
.
intToString
(
localMemorySize
/
32
);
double
cutoff
=
force
.
getCutoffDistance
();
defines
[
"CUTOFF_SQUARED"
]
=
cu
.
doubleToString
(
cutoff
*
cutoff
);
double
paddedCutoff
=
cu
.
getNonbondedUtilities
().
padCutoff
(
cutoff
);
defines
[
"PADDED_CUTOFF_SQUARED"
]
=
cu
.
doubleToString
(
paddedCutoff
*
paddedCutoff
);
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
defines
[
"TILE_SIZE"
]
=
"32"
;
defines
[
"NUM_TILES"
]
=
cu
.
intToString
(
numTileSets
);
int
numContexts
=
cu
.
getPlatformData
().
contexts
.
size
();
int
startIndex
=
cu
.
getContextIndex
()
*
numTileSets
/
numContexts
;
int
endIndex
=
(
cu
.
getContextIndex
()
+
1
)
*
numTileSets
/
numContexts
;
...
...
@@ -2674,12 +2684,19 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
defines
[
"LAST_TILE"
]
=
cu
.
intToString
(
endIndex
);
if
((
localDataSize
/
4
)
%
2
==
0
&&
!
cu
.
getUseDoublePrecision
())
defines
[
"PARAMETER_SIZE_IS_EVEN"
]
=
"1"
;
CUmodule program = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::customNonbondedGroups, replacements), defines);
interactionGroupKernel = cu.getKernel(program, "computeInteractionGroups");
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
cu
.
replaceStrings
(
CudaKernelSources
::
customNonbondedGroups
,
replacements
),
defines
);
interactionGroupKernel
=
cu
.
getKernel
(
module
,
"computeInteractionGroups"
);
prepareNeighborListKernel
=
cu
.
getKernel
(
module
,
"prepareToBuildNeighborList"
);
buildNeighborListKernel
=
cu
.
getKernel
(
module
,
"buildNeighborList"
);
numGroupThreadBlocks
=
cu
.
getNonbondedUtilities
().
getNumForceThreadBlocks
();
}
double
CudaCalcCustomNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
useNeighborList
=
(
filteredGroupData
.
isInitialized
()
&&
cu
.
getNonbondedUtilities
().
getUseCutoff
());
if
(
useNeighborList
&&
cu
.
getContextIndex
()
>
0
)
{
// When using a neighbor list, run the whole calculation on a single device.
return
0.0
;
}
if
(
globals
.
isInitialized
())
{
bool
changed
=
false
;
for
(
int
i
=
0
;
i
<
(
int
)
globalParamNames
.
size
();
i
++
)
{
...
...
@@ -2706,7 +2723,9 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
interactionGroupArgs
.
push_back
(
&
cu
.
getForce
().
getDevicePointer
());
interactionGroupArgs
.
push_back
(
&
cu
.
getEnergyBuffer
().
getDevicePointer
());
interactionGroupArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
interactionGroupArgs.push_back(&interactionGroupData.getDevicePointer());
interactionGroupArgs
.
push_back
(
&
(
useNeighborList
?
filteredGroupData
:
interactionGroupData
).
getDevicePointer
());
interactionGroupArgs
.
push_back
(
&
numGroupTiles
.
getDevicePointer
());
interactionGroupArgs
.
push_back
(
&
useNeighborList
);
interactionGroupArgs
.
push_back
(
cu
.
getPeriodicBoxSizePointer
());
interactionGroupArgs
.
push_back
(
cu
.
getInvPeriodicBoxSizePointer
());
interactionGroupArgs
.
push_back
(
cu
.
getPeriodicBoxVecXPointer
());
...
...
@@ -2720,8 +2739,30 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
interactionGroupArgs
.
push_back
(
&
globals
.
getDevicePointer
());
if
(
hasParamDerivs
)
interactionGroupArgs
.
push_back
(
&
cu
.
getEnergyParamDerivBuffer
().
getDevicePointer
());
if
(
useNeighborList
)
{
// Initialize kernels for building the interaction group neighbor list.
prepareNeighborListArgs
.
push_back
(
&
cu
.
getNonbondedUtilities
().
getRebuildNeighborList
().
getDevicePointer
());
prepareNeighborListArgs
.
push_back
(
&
numGroupTiles
.
getDevicePointer
());
buildNeighborListArgs
.
push_back
(
&
cu
.
getNonbondedUtilities
().
getRebuildNeighborList
().
getDevicePointer
());
buildNeighborListArgs
.
push_back
(
&
numGroupTiles
.
getDevicePointer
());
buildNeighborListArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
buildNeighborListArgs
.
push_back
(
&
interactionGroupData
.
getDevicePointer
());
buildNeighborListArgs
.
push_back
(
&
filteredGroupData
.
getDevicePointer
());
buildNeighborListArgs
.
push_back
(
cu
.
getPeriodicBoxSizePointer
());
buildNeighborListArgs
.
push_back
(
cu
.
getInvPeriodicBoxSizePointer
());
buildNeighborListArgs
.
push_back
(
cu
.
getPeriodicBoxVecXPointer
());
buildNeighborListArgs
.
push_back
(
cu
.
getPeriodicBoxVecYPointer
());
buildNeighborListArgs
.
push_back
(
cu
.
getPeriodicBoxVecZPointer
());
}
}
int
forceThreadBlockSize
=
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
();
if
(
useNeighborList
)
{
// Rebuild the neighbor list, if necessary.
cu
.
executeKernel
(
prepareNeighborListKernel
,
&
prepareNeighborListArgs
[
0
],
1
,
1
);
cu
.
executeKernel
(
buildNeighborListKernel
,
&
buildNeighborListArgs
[
0
],
numGroupThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
cu
.
executeKernel
(
interactionGroupKernel
,
&
interactionGroupArgs
[
0
],
numGroupThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
double4
boxSize
=
cu
.
getPeriodicBoxSize
();
...
...
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
c89cbcdb
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -266,6 +266,7 @@ void CudaNonbondedUtilities::initialize(const System& system) {
blockSorter
=
new
CudaSort
(
context
,
new
BlockSortTrait
(
context
.
getUseDoublePrecision
()),
numAtomBlocks
);
vector
<
unsigned
int
>
count
(
2
,
0
);
interactionCount
.
upload
(
count
);
rebuildNeighborList
.
upload
(
count
);
}
// Record arguments for kernels.
...
...
@@ -351,6 +352,11 @@ double CudaNonbondedUtilities::getMaxCutoffDistance() {
return
cutoff
;
}
double
CudaNonbondedUtilities
::
padCutoff
(
double
cutoff
)
{
double
padding
=
(
usePadding
?
0.1
*
cutoff
:
0.0
);
return
cutoff
+
padding
;
}
void
CudaNonbondedUtilities
::
prepareInteractions
(
int
forceGroups
)
{
if
((
forceGroups
&
groupFlags
)
==
0
)
return
;
...
...
@@ -462,13 +468,12 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
kernels
.
source
=
source
;
kernels
.
forceKernel
=
kernels
.
energyKernel
=
kernels
.
forceEnergyKernel
=
NULL
;
if
(
useCutoff
)
{
double
padding
=
(
usePadding
?
0.1
*
cutoff
:
0.0
);
double
paddedCutoff
=
cutoff
+
padding
;
double
paddedCutoff
=
padCutoff
(
cutoff
);
map
<
string
,
string
>
defines
;
defines
[
"TILE_SIZE"
]
=
context
.
intToString
(
CudaContext
::
TileSize
);
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getNumAtoms
());
defines
[
"PADDING"
]
=
context
.
doubleToString
(
padd
ing
);
defines
[
"PADDING"
]
=
context
.
doubleToString
(
padd
edCutoff
-
cutoff
);
defines
[
"PADDED_CUTOFF"
]
=
context
.
doubleToString
(
paddedCutoff
);
defines
[
"PADDED_CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
paddedCutoff
*
paddedCutoff
);
defines
[
"NUM_TILES_WITH_EXCLUSIONS"
]
=
context
.
intToString
(
exclusionTiles
.
getSize
());
...
...
platforms/cuda/src/kernels/customNonbondedGroups.cu
View file @
c89cbcdb
...
...
@@ -10,6 +10,7 @@ typedef struct {
extern
"C"
__global__
void
computeInteractionGroups
(
unsigned
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
groupData
,
int
*
__restrict__
numGroupTiles
,
bool
useNeighborList
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
...
...
@@ -20,8 +21,8 @@ extern "C" __global__ void computeInteractionGroups(
INIT_DERIVATIVES
__shared__
AtomData
localData
[
LOCAL_MEMORY_SIZE
];
const
unsigned
int
startTile
=
FIRST_TILE
+
warp
*
(
LAST_TILE
-
FIRST_TILE
)
/
totalWarps
;
const
unsigned
int
endTile
=
FIRST_TILE
+
(
warp
+
1
)
*
(
LAST_TILE
-
FIRST_TILE
)
/
totalWarps
;
const
unsigned
int
startTile
=
(
useNeighborList
?
warp
*
numGroupTiles
[
0
]
/
totalWarps
:
FIRST_TILE
+
warp
*
(
LAST_TILE
-
FIRST_TILE
)
/
totalWarps
)
;
const
unsigned
int
endTile
=
(
useNeighborList
?
(
warp
+
1
)
*
numGroupTiles
[
0
]
/
totalWarps
:
FIRST_TILE
+
(
warp
+
1
)
*
(
LAST_TILE
-
FIRST_TILE
)
/
totalWarps
)
;
for
(
int
tile
=
startTile
;
tile
<
endTile
;
tile
++
)
{
const
int4
atomData
=
groupData
[
TILE_SIZE
*
tile
+
tgx
];
const
int
atom1
=
atomData
.
x
;
...
...
@@ -86,3 +87,75 @@ extern "C" __global__ void computeInteractionGroups(
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
SAVE_DERIVATIVES
}
/**
* If the neighbor list needs to be rebuilt, reset the number of tiles to 0. This is
* executed by a single thread.
*/
extern
"C"
__global__
void
prepareToBuildNeighborList
(
int
*
__restrict__
rebuildNeighborList
,
int
*
__restrict__
numGroupTiles
)
{
if
(
rebuildNeighborList
[
0
]
==
1
)
numGroupTiles
[
0
]
=
0
;
}
/**
* Filter the list of tiles to include only ones that have interactions within the
* padded cutoff.
*/
extern
"C"
__global__
void
buildNeighborList
(
int
*
__restrict__
rebuildNeighborList
,
int
*
__restrict__
numGroupTiles
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
groupData
,
int4
*
__restrict__
filteredGroupData
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
)
{
// If the neighbor list doesn't need to be rebuilt on this step, return immediately.
if
(
rebuildNeighborList
[
0
]
==
0
)
return
;
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
// global warpIndex
const
unsigned
int
local_warp
=
threadIdx
.
x
/
TILE_SIZE
;
// local warpIndex
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
// index within the warp
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
// block warpIndex
__shared__
real4
localPos
[
LOCAL_MEMORY_SIZE
];
__shared__
volatile
bool
anyInteraction
[
WARPS_IN_BLOCK
];
__shared__
volatile
int
tileIndex
[
WARPS_IN_BLOCK
];
const
unsigned
int
startTile
=
warp
*
NUM_TILES
/
totalWarps
;
const
unsigned
int
endTile
=
(
warp
+
1
)
*
NUM_TILES
/
totalWarps
;
for
(
int
tile
=
startTile
;
tile
<
endTile
;
tile
++
)
{
const
int4
atomData
=
groupData
[
TILE_SIZE
*
tile
+
tgx
];
const
int
atom1
=
atomData
.
x
;
const
int
atom2
=
atomData
.
y
;
const
int
rangeStart
=
atomData
.
z
&
0xFFFF
;
const
int
rangeEnd
=
(
atomData
.
z
>>
16
)
&
0xFFFF
;
const
int
exclusions
=
atomData
.
w
;
real4
posq1
=
posq
[
atom1
];
localPos
[
threadIdx
.
x
]
=
posq
[
atom2
];
if
(
tgx
==
0
)
anyInteraction
[
local_warp
]
=
false
;
int
tj
=
tgx
;
SYNC_WARPS
;
for
(
int
j
=
rangeStart
;
j
<
rangeEnd
&&
!
anyInteraction
[
local_warp
];
j
++
)
{
if
(
tj
<
rangeEnd
)
{
bool
isExcluded
=
(((
exclusions
>>
tj
)
&
1
)
==
0
);
int
localIndex
=
tbx
+
tj
;
real3
delta
=
make_real3
(
localPos
[
localIndex
].
x
-
posq1
.
x
,
localPos
[
localIndex
].
y
-
posq1
.
y
,
localPos
[
localIndex
].
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
if
(
!
isExcluded
&&
r2
<
PADDED_CUTOFF_SQUARED
)
anyInteraction
[
local_warp
]
=
true
;
}
tj
=
(
tj
==
rangeEnd
-
1
?
rangeStart
:
tj
+
1
);
SYNC_WARPS
;
}
if
(
anyInteraction
[
local_warp
])
{
SYNC_WARPS
;
if
(
tgx
==
0
)
tileIndex
[
local_warp
]
=
atomicAdd
(
numGroupTiles
,
1
);
SYNC_WARPS
;
filteredGroupData
[
TILE_SIZE
*
tileIndex
[
local_warp
]
+
tgx
]
=
atomData
;
}
}
}
platforms/opencl/include/OpenCLKernels.h
View file @
c89cbcdb
...
...
@@ -742,15 +742,15 @@ private:
ForceInfo
*
info
;
OpenCLParameterSet
*
params
;
OpenCLArray
globals
;
OpenCLArray
interactionGroupData
;
cl
::
Kernel
interactionGroupKernel
;
OpenCLArray
interactionGroupData
,
filteredGroupData
,
numGroupTiles
;
cl
::
Kernel
interactionGroupKernel
,
prepareNeighborListKernel
,
buildNeighborListKernel
;
std
::
vector
<
void
*>
interactionGroupArgs
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
OpenCLArray
>
tabulatedFunctions
;
double
longRangeCoefficient
;
std
::
vector
<
double
>
longRangeCoefficientDerivs
;
bool
hasInitializedLongRangeCorrection
,
hasInitializedKernel
,
hasParamDerivs
;
bool
hasInitializedLongRangeCorrection
,
hasInitializedKernel
,
hasParamDerivs
,
useNeighborList
;
int
numGroupThreadBlocks
;
CustomNonbondedForce
*
forceCopy
;
const
System
&
system
;
...
...
platforms/opencl/include/OpenCLNonbondedUtilities.h
View file @
c89cbcdb
...
...
@@ -153,6 +153,11 @@ public:
bool
getHasInteractions
()
{
return
(
groupCutoff
.
size
()
>
0
);
}
/**
* Given a nonbonded cutoff, get the padded cutoff distance used in computing
* the neighbor list.
*/
double
padCutoff
(
double
cutoff
);
/**
* Prepare to compute interactions. This updates the neighbor list.
*/
...
...
@@ -225,6 +230,13 @@ public:
OpenCLArray
&
getExclusionRowIndices
()
{
return
exclusionRowIndices
;
}
/**
* Get the array containing a flag for whether the neighbor list was rebuilt
* on the most recent call to prepareInteractions().
*/
OpenCLArray
&
getRebuildNeighborList
()
{
return
rebuildNeighborList
;
}
/**
* Get the index of the first tile this context is responsible for processing.
*/
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
c89cbcdb
...
...
@@ -54,6 +54,7 @@
#include "jama_eig.h"
#include <algorithm>
#include <cmath>
#include <iterator>
#include <set>
using namespace OpenMM;
...
...
@@ -2550,7 +2551,8 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
vector<vector<int> > atomLists;
vector<pair<int, int> > tiles;
map<pair<int, int>, int> duplicateInteractions;
vector<int> tileGroup;
vector<vector<int> > duplicateAtomsForGroup;
for (int group = 0; group < force.getNumInteractionGroups(); group++) {
// Get the list of atoms in this group and sort them.
...
...
@@ -2561,6 +2563,10 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
atoms2.insert(atoms2.begin(), set2.begin(), set2.end());
sort(atoms1.begin(), atoms1.end());
sort(atoms2.begin(), atoms2.end());
duplicateAtomsForGroup.push_back(vector<int>());
set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(),
inserter(duplicateAtomsForGroup[group], duplicateAtomsForGroup[group].begin()));
sort(duplicateAtomsForGroup[group].begin(), duplicateAtomsForGroup[group].end());
// Find how many tiles we will create for this group.
...
...
@@ -2572,9 +2578,12 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
// Add the tiles.
int firstTile = tiles.size();
for (int i = 0; i < numBlocks1; i++)
for (int j = 0; j < numBlocks2; j++)
for (int j = 0; j < numBlocks2; j++)
{
tiles.push_back(make_pair(atomLists.size()+i, atomLists.size()+numBlocks1+j));
tileGroup.push_back(group);
}
// Add the atom lists.
...
...
@@ -2594,22 +2603,6 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
atoms.push_back(atoms2[j]);
atomLists.push_back(atoms);
}
// If this group contains duplicate interactions, record that we need to skip them once.
for (int a1 : atoms1) {
if (set2.find(a1) == set2.end())
continue;
for (int j = 0; j < (int) atoms2.size() && atoms2[j] < a1; j++) {
int a2 = atoms2[j];
if (set1.find(a2) != set1.end()) {
pair<int, int> key = make_pair(a2, a1);
if (duplicateInteractions.find(key) == duplicateInteractions.end())
duplicateInteractions[key] = 0;
duplicateInteractions[key]++;
}
}
}
}
// Build a lookup table for quickly identifying excluded interactions.
...
...
@@ -2627,15 +2620,18 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
vector<vector<int> > exclusionFlags(tiles.size());
vector<pair<int, int> > tileOrder;
for (int tile = 0; tile < tiles.size(); tile++) {
bool swapped = false;
if (atomLists[tiles[tile].first].size() < atomLists[tiles[tile].second].size()) {
// For efficiency, we want the first axis to be the larger one.
int swap = tiles[tile].first;
tiles[tile].first = tiles[tile].second;
tiles[tile].second = swap;
swapped = true;
}
vector<int>& atoms1 = atomLists[tiles[tile].first];
vector<int>& atoms2 = atomLists[tiles[tile].second];
vector<int>& duplicateAtoms = duplicateAtomsForGroup[tileGroup[tile]];
vector<int> flags(atoms1.size(), (int) (1LL<<atoms2.size())-1);
int numExcluded = 0;
for (int i = 0; i < (int) atoms1.size(); i++)
...
...
@@ -2646,11 +2642,10 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
pair<int, int> key = make_pair(min(a1, a2), max(a1, a2));
if (a1 == a2 || exclusions.find(key) != exclusions.end())
isExcluded = true; // This is an excluded interaction.
else if (
duplicateInteractions.find(key) != duplicateInteractions.end() && duplicateInteractions[key] > 0
) {
else if (
(a1 > a2) == swapped && binary_search(duplicateAtoms.begin(), duplicateAtoms.end(), a1) && binary_search(duplicateAtoms.begin(), duplicateAtoms.end(), a2)
) {
// Both atoms are in both sets, so skip duplicate interactions.
isExcluded = true;
duplicateInteractions[key]--;
}
if (isExcluded) {
flags[i] &= -1-(1<<j);
...
...
@@ -2713,6 +2708,16 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
}
interactionGroupData.initialize<mm_int4>(cl, groupData.size(), "interactionGroupData");
interactionGroupData.upload(groupData);
numGroupTiles.initialize<cl_int>(cl, 1, "numGroupTiles");
// Allocate space for a neighbor list, if necessary.
if (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && groupData.size() > cl.getNumThreadBlocks()) {
filteredGroupData.initialize<mm_int4>(cl, groupData.size(), "filteredGroupData");
interactionGroupData.copyTo(filteredGroupData);
int numTiles = groupData.size()/32;
numGroupTiles.upload(&numTiles);
}
// Create the kernel.
...
...
@@ -2791,11 +2796,16 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
defines["USE_CUTOFF"] = "1";
if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic)
defines["USE_PERIODIC"] = "1";
defines["LOCAL_MEMORY_SIZE"] = cl.intToString(max(32, cl.getNonbondedUtilities().getForceThreadBlockSize()));
int localMemorySize = max(32, cl.getNonbondedUtilities().getForceThreadBlockSize());
defines["LOCAL_MEMORY_SIZE"] = cl.intToString(localMemorySize);
defines["WARPS_IN_BLOCK"] = cl.intToString(localMemorySize/32);
double cutoff = force.getCutoffDistance();
defines["CUTOFF_SQUARED"] = cl.doubleToString(cutoff*cutoff);
double paddedCutoff = cl.getNonbondedUtilities().padCutoff(cutoff);
defines["PADDED_CUTOFF_SQUARED"] = cl.doubleToString(paddedCutoff*paddedCutoff);
defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
defines["TILE_SIZE"] = "32";
defines["NUM_TILES"] = cl.intToString(numTileSets);
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*numTileSets/numContexts;
int endIndex = (cl.getContextIndex()+1)*numTileSets/numContexts;
...
...
@@ -2805,10 +2815,17 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
defines["PARAMETER_SIZE_IS_EVEN"] = "1";
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customNonbondedGroups, replacements), defines);
interactionGroupKernel = cl::Kernel(program, "computeInteractionGroups");
prepareNeighborListKernel = cl::Kernel(program, "prepareToBuildNeighborList");
buildNeighborListKernel = cl::Kernel(program, "buildNeighborList");
numGroupThreadBlocks = cl.getNonbondedUtilities().getNumForceThreadBlocks();
}
double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
useNeighborList = (filteredGroupData.isInitialized() && cl.getNonbondedUtilities().getUseCutoff());
if (useNeighborList && cl.getContextIndex() > 0) {
// When using a neighbor list, run the whole calculation on a single device.
return 0.0;
}
if (globals.isInitialized()) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
...
...
@@ -2837,7 +2854,9 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
interactionGroupKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer() : cl.getForceBuffers()).getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, interactionGroupData.getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, (useNeighborList ? filteredGroupData : interactionGroupData).getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, numGroupTiles.getDeviceBuffer());
interactionGroupKernel.setArg<cl_int>(index++, useNeighborList);
index += 5;
for (auto& buffer : params->getBuffers())
interactionGroupKernel.setArg<cl::Memory>(index++, buffer.getMemory());
...
...
@@ -2847,9 +2866,27 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
interactionGroupKernel.setArg<cl::Buffer>(index++, globals.getDeviceBuffer());
if (hasParamDerivs)
interactionGroupKernel.setArg<cl::Memory>(index++, cl.getEnergyParamDerivBuffer().getDeviceBuffer());
if (useNeighborList) {
// Initialize kernels for building the interaction group neighbor list.
prepareNeighborListKernel.setArg<cl::Buffer>(0, cl.getNonbondedUtilities().getRebuildNeighborList().getDeviceBuffer());
prepareNeighborListKernel.setArg<cl::Buffer>(1, numGroupTiles.getDeviceBuffer());
buildNeighborListKernel.setArg<cl::Buffer>(0, cl.getNonbondedUtilities().getRebuildNeighborList().getDeviceBuffer());
buildNeighborListKernel.setArg<cl::Buffer>(1, numGroupTiles.getDeviceBuffer());
buildNeighborListKernel.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
buildNeighborListKernel.setArg<cl::Buffer>(3, interactionGroupData.getDeviceBuffer());
buildNeighborListKernel.setArg<cl::Buffer>(4, filteredGroupData.getDeviceBuffer());
}
}
setPeriodicBoxArgs(cl, interactionGroupKernel, 4);
int forceThreadBlockSize = max(32, cl.getNonbondedUtilities().getForceThreadBlockSize());
if (useNeighborList) {
// Rebuild the neighbor list, if necessary.
setPeriodicBoxArgs(cl, buildNeighborListKernel, 5);
cl.executeKernel(prepareNeighborListKernel, 1, 1);
cl.executeKernel(buildNeighborListKernel, numGroupThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
}
setPeriodicBoxArgs(cl, interactionGroupKernel, 6);
cl.executeKernel(interactionGroupKernel, numGroupThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
}
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
c89cbcdb
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -296,6 +296,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
blockSorter
=
new
OpenCLSort
(
context
,
new
BlockSortTrait
(
context
.
getUseDoublePrecision
()),
numAtomBlocks
);
vector
<
cl_uint
>
count
(
1
,
0
);
interactionCount
.
upload
(
count
);
rebuildNeighborList
.
upload
(
count
);
}
}
...
...
@@ -323,6 +324,11 @@ double OpenCLNonbondedUtilities::getMaxCutoffDistance() {
return
cutoff
;
}
double
OpenCLNonbondedUtilities
::
padCutoff
(
double
cutoff
)
{
double
padding
=
(
usePadding
?
0.1
*
cutoff
:
0.0
);
return
cutoff
+
padding
;
}
void
OpenCLNonbondedUtilities
::
prepareInteractions
(
int
forceGroups
)
{
if
((
forceGroups
&
groupFlags
)
==
0
)
return
;
...
...
@@ -464,12 +470,11 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
kernels
.
cutoffDistance
=
cutoff
;
kernels
.
source
=
source
;
if
(
useCutoff
)
{
double
padding
=
(
usePadding
?
0.1
*
cutoff
:
0.0
);
double
paddedCutoff
=
cutoff
+
padding
;
double
paddedCutoff
=
padCutoff
(
cutoff
);
map
<
string
,
string
>
defines
;
defines
[
"TILE_SIZE"
]
=
context
.
intToString
(
OpenCLContext
::
TileSize
);
defines
[
"NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getNumAtoms
());
defines
[
"PADDING"
]
=
context
.
doubleToString
(
padd
ing
);
defines
[
"PADDING"
]
=
context
.
doubleToString
(
padd
edCutoff
-
cutoff
);
defines
[
"PADDED_CUTOFF"
]
=
context
.
doubleToString
(
paddedCutoff
);
defines
[
"PADDED_CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
paddedCutoff
*
paddedCutoff
);
defines
[
"NUM_TILES_WITH_EXCLUSIONS"
]
=
context
.
intToString
(
exclusionTiles
.
getSize
());
...
...
platforms/opencl/src/kernels/customNonbondedGroups.cl
View file @
c89cbcdb
...
...
@@ -43,6 +43,7 @@ __kernel void computeInteractionGroups(
__global
real4*
restrict
forceBuffers,
#
endif
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
int4*
restrict
groupData,
__global
int*
restrict
numGroupTiles,
int
useNeighborList,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
...
...
@@ -53,8 +54,8 @@ __kernel void computeInteractionGroups(
INIT_DERIVATIVES
__local
AtomData
localData[LOCAL_MEMORY_SIZE]
;
const
unsigned
int
startTile
=
FIRST_TILE+warp*
(
LAST_TILE-FIRST_TILE
)
/totalWarps
;
const
unsigned
int
endTile
=
FIRST_TILE+
(
warp+1
)
*
(
LAST_TILE-FIRST_TILE
)
/totalWarps
;
const
unsigned
int
startTile
=
(
useNeighborList
?
warp*numGroupTiles[0]/totalWarps
:
FIRST_TILE+warp*
(
LAST_TILE-FIRST_TILE
)
/totalWarps
)
;
const
unsigned
int
endTile
=
(
useNeighborList
?
(
warp+1
)
*numGroupTiles[0]/totalWarps
:
FIRST_TILE+
(
warp+1
)
*
(
LAST_TILE-FIRST_TILE
)
/totalWarps
)
;
for
(
int
tile
=
startTile
; tile < endTile; tile++) {
const
int4
atomData
=
groupData[TILE_SIZE*tile+tgx]
;
const
int
atom1
=
atomData.x
;
...
...
@@ -129,3 +130,74 @@ __kernel void computeInteractionGroups(
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
SAVE_DERIVATIVES
}
/**
*
If
the
neighbor
list
needs
to
be
rebuilt,
reset
the
number
of
tiles
to
0.
This
is
*
executed
by
a
single
thread.
*/
__kernel
void
prepareToBuildNeighborList
(
__global
int*
restrict
rebuildNeighborList,
__global
int*
restrict
numGroupTiles
)
{
if
(
rebuildNeighborList[0]
==
1
)
numGroupTiles[0]
=
0
;
}
/**
*
Filter
the
list
of
tiles
to
include
only
ones
that
have
interactions
within
the
*
padded
cutoff.
*/
__kernel
void
buildNeighborList
(
__global
int*
restrict
rebuildNeighborList,
__global
int*
restrict
numGroupTiles,
__global
const
real4*
restrict
posq,
__global
const
int4*
restrict
groupData,
__global
int4*
restrict
filteredGroupData,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
)
{
//
If
the
neighbor
list
doesn
't
need
to
be
rebuilt
on
this
step,
return
immediately.
if
(
rebuildNeighborList[0]
==
0
)
return
;
const
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
; // global warpIndex
const
unsigned
int
local_warp
=
get_local_id
(
0
)
/TILE_SIZE
; // local warpIndex
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
; // index within the warp
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
; // block warpIndex
__local
real4
localPos[LOCAL_MEMORY_SIZE]
;
__local
volatile
bool
anyInteraction[WARPS_IN_BLOCK]
;
__local
volatile
int
tileIndex[WARPS_IN_BLOCK]
;
const
unsigned
int
startTile
=
warp*NUM_TILES/totalWarps
;
const
unsigned
int
endTile
=
(
warp+1
)
*NUM_TILES/totalWarps
;
for
(
int
tile
=
startTile
; tile < endTile; tile++) {
const
int4
atomData
=
groupData[TILE_SIZE*tile+tgx]
;
const
int
atom1
=
atomData.x
;
const
int
atom2
=
atomData.y
;
const
int
rangeStart
=
atomData.z&0xFFFF
;
const
int
rangeEnd
=
(
atomData.z>>16
)
&0xFFFF
;
const
int
exclusions
=
atomData.w
;
real4
posq1
=
posq[atom1]
;
localPos[get_local_id
(
0
)
]
=
posq[atom2]
;
if
(
tgx
==
0
)
anyInteraction[local_warp]
=
false
;
int
tj
=
tgx
;
SYNC_WARPS
;
for
(
int
j
=
rangeStart
; j < rangeEnd && !anyInteraction[local_warp]; j++) {
if
(
tj
<
rangeEnd
)
{
bool
isExcluded
=
(((
exclusions>>tj
)
&1
)
==
0
)
;
int
localIndex
=
tbx+tj
;
real4
delta
=
(
real4
)
(
localPos[localIndex].xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
if
(
!isExcluded
&&
r2
<
PADDED_CUTOFF_SQUARED
)
anyInteraction[local_warp]
=
true
;
}
tj
=
(
tj
==
rangeEnd-1
?
rangeStart
:
tj+1
)
;
SYNC_WARPS
;
}
if
(
anyInteraction[local_warp]
)
{
SYNC_WARPS
;
if
(
tgx
==
0
)
tileIndex[local_warp]
=
atomic_add
(
numGroupTiles,
1
)
;
SYNC_WARPS
;
filteredGroupData[TILE_SIZE*tileIndex[local_warp]+tgx]
=
atomData
;
}
}
}
tests/TestCustomNonbondedForce.h
View file @
c89cbcdb
...
...
@@ -7,7 +7,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -978,6 +978,68 @@ void testInteractionGroupTabulatedFunction() {
}
}
void
testInteractionGroupWithCutoff
()
{
const
int
numParticles
=
1000
;
const
double
boxSize
=
10.0
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
NonbondedForce
*
standard
=
new
NonbondedForce
();
CustomNonbondedForce
*
custom
=
new
CustomNonbondedForce
(
"100/(r+0.1)"
);
system
.
addForce
(
standard
);
system
.
addForce
(
custom
);
standard
->
setNonbondedMethod
(
NonbondedForce
::
CutoffPeriodic
);
custom
->
setNonbondedMethod
(
CustomNonbondedForce
::
CutoffPeriodic
);
standard
->
setCutoffDistance
(
1.0
);
custom
->
setCutoffDistance
(
1.0
);
standard
->
setUseSwitchingFunction
(
true
);
custom
->
setUseSwitchingFunction
(
true
);
standard
->
setSwitchingDistance
(
0.9
);
custom
->
setSwitchingDistance
(
0.8
);
vector
<
Vec3
>
positions
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
10.0
);
standard
->
addParticle
(
0.0
,
0.2
,
0.1
);
custom
->
addParticle
();
while
(
true
)
{
positions
[
i
]
=
Vec3
(
genrand_real2
(
sfmt
),
genrand_real2
(
sfmt
),
genrand_real2
(
sfmt
))
*
boxSize
;
bool
tooClose
=
false
;
for
(
int
j
=
0
;
j
<
i
;
j
++
)
{
Vec3
delta
=
positions
[
i
]
-
positions
[
j
];
if
(
delta
.
dot
(
delta
)
<
0.5
*
0.5
)
tooClose
=
true
;
}
if
(
!
tooClose
)
break
;
}
}
set
<
int
>
set1
,
set2
;
for
(
int
i
=
0
;
i
<
10
;
i
++
)
set1
.
insert
(
2
*
i
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
set2
.
insert
(
i
);
custom
->
addInteractionGroup
(
set1
,
set2
);
custom
->
setForceGroup
(
1
);
// Try simulating it and see if energy is conserved (indicating that any optimizations
// for combining the cutoff with the interaction group are behaving consistently).
VerletIntegrator
integrator
(
0.001
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
context
.
setVelocitiesToTemperature
(
100
);
ASSERT
(
context
.
getState
(
State
::
Energy
,
false
,
1
<<
1
).
getPotentialEnergy
()
!=
0.0
);
State
initialState
=
context
.
getState
(
State
::
Energy
);
double
initialEnergy
=
initialState
.
getPotentialEnergy
()
+
initialState
.
getKineticEnergy
();
for
(
int
i
=
0
;
i
<
100
;
i
++
)
{
integrator
.
step
(
10
);
State
state
=
context
.
getState
(
State
::
Energy
);
double
energy
=
state
.
getPotentialEnergy
()
+
state
.
getKineticEnergy
();
ASSERT_EQUAL_TOL
(
initialEnergy
,
energy
,
0.001
);
}
}
void
testMultipleCutoffs
()
{
System
system
;
system
.
addParticle
(
1.0
);
...
...
@@ -1253,6 +1315,7 @@ int main(int argc, char* argv[]) {
testLargeInteractionGroup
();
testInteractionGroupLongRangeCorrection
();
testInteractionGroupTabulatedFunction
();
testInteractionGroupWithCutoff
();
testMultipleCutoffs
();
testMultipleSwitches
();
testIllegalVariable
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment