Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
7639d0bd
Commit
7639d0bd
authored
Mar 12, 2018
by
Peter Eastman
Browse files
CustomNonbondedForce with interaction groups uses neighbor list in CUDA
parent
fb0cc5e7
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
152 additions
and
18 deletions
+152
-18
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+4
-4
platforms/cuda/include/CudaNonbondedUtilities.h
platforms/cuda/include/CudaNonbondedUtilities.h
+12
-0
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+50
-4
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+8
-4
platforms/cuda/src/kernels/customNonbondedGroups.cu
platforms/cuda/src/kernels/customNonbondedGroups.cu
+74
-2
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+1
-1
platforms/opencl/src/kernels/customNonbondedGroups.cl
platforms/opencl/src/kernels/customNonbondedGroups.cl
+3
-3
No files found.
platforms/cuda/include/CudaKernels.h
View file @
7639d0bd
...
@@ -763,15 +763,15 @@ private:
...
@@ -763,15 +763,15 @@ private:
ForceInfo
*
info
;
ForceInfo
*
info
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaArray
globals
;
CudaArray
globals
;
CudaArray
interactionGroupData
;
CudaArray
interactionGroupData
,
filteredGroupData
,
numGroupTiles
;
CUfunction
interactionGroupKernel
;
CUfunction
interactionGroupKernel
,
prepareNeighborListKernel
,
buildNeighborListKernel
;
std
::
vector
<
void
*>
interactionGroupArgs
;
std
::
vector
<
void
*>
interactionGroupArgs
,
prepareNeighborListArgs
,
buildNeighborListArgs
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
CudaArray
>
tabulatedFunctions
;
std
::
vector
<
CudaArray
>
tabulatedFunctions
;
double
longRangeCoefficient
;
double
longRangeCoefficient
;
std
::
vector
<
double
>
longRangeCoefficientDerivs
;
std
::
vector
<
double
>
longRangeCoefficientDerivs
;
bool
hasInitializedLongRangeCorrection
,
hasInitializedKernel
,
hasParamDerivs
;
bool
hasInitializedLongRangeCorrection
,
hasInitializedKernel
,
hasParamDerivs
,
useNeighborList
;
int
numGroupThreadBlocks
;
int
numGroupThreadBlocks
;
CustomNonbondedForce
*
forceCopy
;
CustomNonbondedForce
*
forceCopy
;
const
System
&
system
;
const
System
&
system
;
...
...
platforms/cuda/include/CudaNonbondedUtilities.h
View file @
7639d0bd
...
@@ -142,6 +142,11 @@ public:
...
@@ -142,6 +142,11 @@ public:
* Get the maximum cutoff distance used by any force group.
* Get the maximum cutoff distance used by any force group.
*/
*/
double
getMaxCutoffDistance
();
double
getMaxCutoffDistance
();
/**
* Given a nonbonded cutoff, get the padded cutoff distance used in computing
* the neighbor list.
*/
double
padCutoff
(
double
cutoff
);
/**
/**
* Prepare to compute interactions. This updates the neighbor list.
* Prepare to compute interactions. This updates the neighbor list.
*/
*/
...
@@ -220,6 +225,13 @@ public:
...
@@ -220,6 +225,13 @@ public:
CudaArray
&
getExclusionRowIndices
()
{
CudaArray
&
getExclusionRowIndices
()
{
return
exclusionRowIndices
;
return
exclusionRowIndices
;
}
}
/**
* Get the array containing a flag for whether the neighbor list was rebuilt
* on the most recent call to prepareInteractions().
*/
CudaArray
&
getRebuildNeighborList
()
{
return
rebuildNeighborList
;
}
/**
/**
* Get the index of the first tile this context is responsible for processing.
* Get the index of the first tile this context is responsible for processing.
*/
*/
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
7639d0bd
...
@@ -2584,6 +2584,16 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
...
@@ -2584,6 +2584,16 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
}
}
interactionGroupData
.
initialize
<
int4
>
(
cu
,
groupData
.
size
(),
"interactionGroupData"
);
interactionGroupData
.
initialize
<
int4
>
(
cu
,
groupData
.
size
(),
"interactionGroupData"
);
interactionGroupData
.
upload
(
groupData
);
interactionGroupData
.
upload
(
groupData
);
numGroupTiles
.
initialize
<
int
>
(
cu
,
1
,
"numGroupTiles"
);
// Allocate space for a neighbor list, if necessary.
if
(
force
.
getNonbondedMethod
()
!=
CustomNonbondedForce
::
NoCutoff
&&
groupData
.
size
()
>
cu
.
getNumThreadBlocks
())
{
filteredGroupData
.
initialize
<
int4
>
(
cu
,
groupData
.
size
(),
"filteredGroupData"
);
interactionGroupData
.
copyTo
(
filteredGroupData
);
int
numTiles
=
groupData
.
size
()
/
32
;
numGroupTiles
.
upload
(
&
numTiles
);
}
// Create the kernel.
// Create the kernel.
...
@@ -2662,11 +2672,16 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
...
@@ -2662,11 +2672,16 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
defines
[
"USE_CUTOFF"
]
=
"1"
;
defines
[
"USE_CUTOFF"
]
=
"1"
;
if
(
force
.
getNonbondedMethod
()
==
CustomNonbondedForce
::
CutoffPeriodic
)
if
(
force
.
getNonbondedMethod
()
==
CustomNonbondedForce
::
CutoffPeriodic
)
defines
[
"USE_PERIODIC"
]
=
"1"
;
defines
[
"USE_PERIODIC"
]
=
"1"
;
defines
[
"LOCAL_MEMORY_SIZE"
]
=
cu
.
intToString
(
max
(
32
,
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
()));
int
localMemorySize
=
max
(
32
,
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
());
defines
[
"LOCAL_MEMORY_SIZE"
]
=
cu
.
intToString
(
localMemorySize
);
defines
[
"WARPS_IN_BLOCK"
]
=
cu
.
intToString
(
localMemorySize
/
32
);
double
cutoff
=
force
.
getCutoffDistance
();
double
cutoff
=
force
.
getCutoffDistance
();
defines
[
"CUTOFF_SQUARED"
]
=
cu
.
doubleToString
(
cutoff
*
cutoff
);
defines
[
"CUTOFF_SQUARED"
]
=
cu
.
doubleToString
(
cutoff
*
cutoff
);
double
paddedCutoff
=
cu
.
getNonbondedUtilities
().
padCutoff
(
cutoff
);
defines
[
"PADDED_CUTOFF_SQUARED"
]
=
cu
.
doubleToString
(
paddedCutoff
*
paddedCutoff
);
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
defines
[
"TILE_SIZE"
]
=
"32"
;
defines
[
"TILE_SIZE"
]
=
"32"
;
defines
[
"NUM_TILES"
]
=
cu
.
intToString
(
numTileSets
);
int
numContexts
=
cu
.
getPlatformData
().
contexts
.
size
();
int
numContexts
=
cu
.
getPlatformData
().
contexts
.
size
();
int
startIndex
=
cu
.
getContextIndex
()
*
numTileSets
/
numContexts
;
int
startIndex
=
cu
.
getContextIndex
()
*
numTileSets
/
numContexts
;
int
endIndex
=
(
cu
.
getContextIndex
()
+
1
)
*
numTileSets
/
numContexts
;
int
endIndex
=
(
cu
.
getContextIndex
()
+
1
)
*
numTileSets
/
numContexts
;
...
@@ -2674,12 +2689,19 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
...
@@ -2674,12 +2689,19 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
defines
[
"LAST_TILE"
]
=
cu
.
intToString
(
endIndex
);
defines
[
"LAST_TILE"
]
=
cu
.
intToString
(
endIndex
);
if
((
localDataSize
/
4
)
%
2
==
0
&&
!
cu
.
getUseDoublePrecision
())
if
((
localDataSize
/
4
)
%
2
==
0
&&
!
cu
.
getUseDoublePrecision
())
defines
[
"PARAMETER_SIZE_IS_EVEN"
]
=
"1"
;
defines
[
"PARAMETER_SIZE_IS_EVEN"
]
=
"1"
;
CUmodule
program
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
cu
.
replaceStrings
(
CudaKernelSources
::
customNonbondedGroups
,
replacements
),
defines
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
cu
.
replaceStrings
(
CudaKernelSources
::
customNonbondedGroups
,
replacements
),
defines
);
interactionGroupKernel
=
cu
.
getKernel
(
program
,
"computeInteractionGroups"
);
interactionGroupKernel
=
cu
.
getKernel
(
module
,
"computeInteractionGroups"
);
prepareNeighborListKernel
=
cu
.
getKernel
(
module
,
"prepareToBuildNeighborList"
);
buildNeighborListKernel
=
cu
.
getKernel
(
module
,
"buildNeighborList"
);
numGroupThreadBlocks
=
cu
.
getNonbondedUtilities
().
getNumForceThreadBlocks
();
numGroupThreadBlocks
=
cu
.
getNonbondedUtilities
().
getNumForceThreadBlocks
();
}
}
double
CudaCalcCustomNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
double
CudaCalcCustomNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
useNeighborList
=
(
filteredGroupData
.
isInitialized
()
&&
cu
.
getNonbondedUtilities
().
getUseCutoff
());
if
(
useNeighborList
&&
cu
.
getContextIndex
()
>
0
)
{
// When using a neighbor list, run the whole calculation on a single device.
return
0.0
;
}
if
(
globals
.
isInitialized
())
{
if
(
globals
.
isInitialized
())
{
bool
changed
=
false
;
bool
changed
=
false
;
for
(
int
i
=
0
;
i
<
(
int
)
globalParamNames
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
globalParamNames
.
size
();
i
++
)
{
...
@@ -2706,7 +2728,9 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
...
@@ -2706,7 +2728,9 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
interactionGroupArgs
.
push_back
(
&
cu
.
getForce
().
getDevicePointer
());
interactionGroupArgs
.
push_back
(
&
cu
.
getForce
().
getDevicePointer
());
interactionGroupArgs
.
push_back
(
&
cu
.
getEnergyBuffer
().
getDevicePointer
());
interactionGroupArgs
.
push_back
(
&
cu
.
getEnergyBuffer
().
getDevicePointer
());
interactionGroupArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
interactionGroupArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
interactionGroupArgs
.
push_back
(
&
interactionGroupData
.
getDevicePointer
());
interactionGroupArgs
.
push_back
(
&
(
useNeighborList
?
filteredGroupData
:
interactionGroupData
).
getDevicePointer
());
interactionGroupArgs
.
push_back
(
&
numGroupTiles
.
getDevicePointer
());
interactionGroupArgs
.
push_back
(
&
useNeighborList
);
interactionGroupArgs
.
push_back
(
cu
.
getPeriodicBoxSizePointer
());
interactionGroupArgs
.
push_back
(
cu
.
getPeriodicBoxSizePointer
());
interactionGroupArgs
.
push_back
(
cu
.
getInvPeriodicBoxSizePointer
());
interactionGroupArgs
.
push_back
(
cu
.
getInvPeriodicBoxSizePointer
());
interactionGroupArgs
.
push_back
(
cu
.
getPeriodicBoxVecXPointer
());
interactionGroupArgs
.
push_back
(
cu
.
getPeriodicBoxVecXPointer
());
...
@@ -2720,8 +2744,30 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
...
@@ -2720,8 +2744,30 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
interactionGroupArgs
.
push_back
(
&
globals
.
getDevicePointer
());
interactionGroupArgs
.
push_back
(
&
globals
.
getDevicePointer
());
if
(
hasParamDerivs
)
if
(
hasParamDerivs
)
interactionGroupArgs
.
push_back
(
&
cu
.
getEnergyParamDerivBuffer
().
getDevicePointer
());
interactionGroupArgs
.
push_back
(
&
cu
.
getEnergyParamDerivBuffer
().
getDevicePointer
());
if
(
useNeighborList
)
{
// Initialize kernels for building the interaction group neighbor list.
prepareNeighborListArgs
.
push_back
(
&
cu
.
getNonbondedUtilities
().
getRebuildNeighborList
().
getDevicePointer
());
prepareNeighborListArgs
.
push_back
(
&
numGroupTiles
.
getDevicePointer
());
buildNeighborListArgs
.
push_back
(
&
cu
.
getNonbondedUtilities
().
getRebuildNeighborList
().
getDevicePointer
());
buildNeighborListArgs
.
push_back
(
&
numGroupTiles
.
getDevicePointer
());
buildNeighborListArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
buildNeighborListArgs
.
push_back
(
&
interactionGroupData
.
getDevicePointer
());
buildNeighborListArgs
.
push_back
(
&
filteredGroupData
.
getDevicePointer
());
buildNeighborListArgs
.
push_back
(
cu
.
getPeriodicBoxSizePointer
());
buildNeighborListArgs
.
push_back
(
cu
.
getInvPeriodicBoxSizePointer
());
buildNeighborListArgs
.
push_back
(
cu
.
getPeriodicBoxVecXPointer
());
buildNeighborListArgs
.
push_back
(
cu
.
getPeriodicBoxVecYPointer
());
buildNeighborListArgs
.
push_back
(
cu
.
getPeriodicBoxVecZPointer
());
}
}
}
int
forceThreadBlockSize
=
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
();
int
forceThreadBlockSize
=
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
();
if
(
useNeighborList
)
{
// Rebuild the neighbor list, if necessary.
cu
.
executeKernel
(
prepareNeighborListKernel
,
&
prepareNeighborListArgs
[
0
],
1
,
1
);
cu
.
executeKernel
(
buildNeighborListKernel
,
&
buildNeighborListArgs
[
0
],
numGroupThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
cu
.
executeKernel
(
interactionGroupKernel
,
&
interactionGroupArgs
[
0
],
numGroupThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
cu
.
executeKernel
(
interactionGroupKernel
,
&
interactionGroupArgs
[
0
],
numGroupThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
}
double4
boxSize
=
cu
.
getPeriodicBoxSize
();
double4
boxSize
=
cu
.
getPeriodicBoxSize
();
...
...
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
7639d0bd
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
8
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -351,6 +351,11 @@ double CudaNonbondedUtilities::getMaxCutoffDistance() {
...
@@ -351,6 +351,11 @@ double CudaNonbondedUtilities::getMaxCutoffDistance() {
return
cutoff
;
return
cutoff
;
}
}
double
CudaNonbondedUtilities
::
padCutoff
(
double
cutoff
)
{
double
padding
=
(
usePadding
?
0.1
*
cutoff
:
0.0
);
return
cutoff
+
padding
;
}
void
CudaNonbondedUtilities
::
prepareInteractions
(
int
forceGroups
)
{
void
CudaNonbondedUtilities
::
prepareInteractions
(
int
forceGroups
)
{
if
((
forceGroups
&
groupFlags
)
==
0
)
if
((
forceGroups
&
groupFlags
)
==
0
)
return
;
return
;
...
@@ -462,13 +467,12 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
...
@@ -462,13 +467,12 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
kernels
.
source
=
source
;
kernels
.
source
=
source
;
kernels
.
forceKernel
=
kernels
.
energyKernel
=
kernels
.
forceEnergyKernel
=
NULL
;
kernels
.
forceKernel
=
kernels
.
energyKernel
=
kernels
.
forceEnergyKernel
=
NULL
;
if
(
useCutoff
)
{
if
(
useCutoff
)
{
double
padding
=
(
usePadding
?
0.1
*
cutoff
:
0.0
);
double
paddedCutoff
=
padCutoff
(
cutoff
);
double
paddedCutoff
=
cutoff
+
padding
;
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"TILE_SIZE"
]
=
context
.
intToString
(
CudaContext
::
TileSize
);
defines
[
"TILE_SIZE"
]
=
context
.
intToString
(
CudaContext
::
TileSize
);
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getNumAtoms
());
defines
[
"PADDING"
]
=
context
.
doubleToString
(
padd
ing
);
defines
[
"PADDING"
]
=
context
.
doubleToString
(
padd
edCutoff
-
cutoff
);
defines
[
"PADDED_CUTOFF"
]
=
context
.
doubleToString
(
paddedCutoff
);
defines
[
"PADDED_CUTOFF"
]
=
context
.
doubleToString
(
paddedCutoff
);
defines
[
"PADDED_CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
paddedCutoff
*
paddedCutoff
);
defines
[
"PADDED_CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
paddedCutoff
*
paddedCutoff
);
defines
[
"NUM_TILES_WITH_EXCLUSIONS"
]
=
context
.
intToString
(
exclusionTiles
.
getSize
());
defines
[
"NUM_TILES_WITH_EXCLUSIONS"
]
=
context
.
intToString
(
exclusionTiles
.
getSize
());
...
...
platforms/cuda/src/kernels/customNonbondedGroups.cu
View file @
7639d0bd
...
@@ -10,6 +10,7 @@ typedef struct {
...
@@ -10,6 +10,7 @@ typedef struct {
extern
"C"
__global__
void
computeInteractionGroups
(
extern
"C"
__global__
void
computeInteractionGroups
(
unsigned
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
groupData
,
unsigned
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
groupData
,
int
*
__restrict__
numGroupTiles
,
bool
useNeighborList
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
...
@@ -20,8 +21,8 @@ extern "C" __global__ void computeInteractionGroups(
...
@@ -20,8 +21,8 @@ extern "C" __global__ void computeInteractionGroups(
INIT_DERIVATIVES
INIT_DERIVATIVES
__shared__
AtomData
localData
[
LOCAL_MEMORY_SIZE
];
__shared__
AtomData
localData
[
LOCAL_MEMORY_SIZE
];
const
unsigned
int
startTile
=
FIRST_TILE
+
warp
*
(
LAST_TILE
-
FIRST_TILE
)
/
totalWarps
;
const
unsigned
int
startTile
=
(
useNeighborList
?
warp
*
numGroupTiles
[
0
]
/
totalWarps
:
FIRST_TILE
+
warp
*
(
LAST_TILE
-
FIRST_TILE
)
/
totalWarps
)
;
const
unsigned
int
endTile
=
FIRST_TILE
+
(
warp
+
1
)
*
(
LAST_TILE
-
FIRST_TILE
)
/
totalWarps
;
const
unsigned
int
endTile
=
(
useNeighborList
?
(
warp
+
1
)
*
numGroupTiles
[
0
]
/
totalWarps
:
FIRST_TILE
+
(
warp
+
1
)
*
(
LAST_TILE
-
FIRST_TILE
)
/
totalWarps
)
;
for
(
int
tile
=
startTile
;
tile
<
endTile
;
tile
++
)
{
for
(
int
tile
=
startTile
;
tile
<
endTile
;
tile
++
)
{
const
int4
atomData
=
groupData
[
TILE_SIZE
*
tile
+
tgx
];
const
int4
atomData
=
groupData
[
TILE_SIZE
*
tile
+
tgx
];
const
int
atom1
=
atomData
.
x
;
const
int
atom1
=
atomData
.
x
;
...
@@ -86,3 +87,74 @@ extern "C" __global__ void computeInteractionGroups(
...
@@ -86,3 +87,74 @@ extern "C" __global__ void computeInteractionGroups(
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
SAVE_DERIVATIVES
SAVE_DERIVATIVES
}
}
/**
* If the neighbor list needs to be rebuilt, reset the number of tiles to 0. This is
* executed by a single thread.
*/
extern
"C"
__global__
void
prepareToBuildNeighborList
(
int
*
__restrict__
rebuildNeighborList
,
int
*
__restrict__
numGroupTiles
)
{
if
(
rebuildNeighborList
[
0
]
==
1
)
numGroupTiles
[
0
]
=
0
;
}
/**
* Filter the list of tiles to include only ones that have interactions within the
* padded cutoff.
*/
extern
"C"
__global__
void
buildNeighborList
(
int
*
__restrict__
rebuildNeighborList
,
int
*
__restrict__
numGroupTiles
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
groupData
,
int4
*
__restrict__
filteredGroupData
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
)
{
// If the neighbor list doesn't need to be rebuilt on this step, return immediately.
if
(
rebuildNeighborList
[
0
]
==
0
)
return
;
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
// global warpIndex
const
unsigned
int
local_warp
=
threadIdx
.
x
/
TILE_SIZE
;
// local warpIndex
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
// index within the warp
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
// block warpIndex
__shared__
real4
localPos
[
LOCAL_MEMORY_SIZE
];
__shared__
volatile
bool
anyInteraction
[
WARPS_IN_BLOCK
];
__shared__
volatile
int
tileIndex
[
WARPS_IN_BLOCK
];
const
unsigned
int
startTile
=
warp
*
NUM_TILES
/
totalWarps
;
const
unsigned
int
endTile
=
(
warp
+
1
)
*
NUM_TILES
/
totalWarps
;
for
(
int
tile
=
startTile
;
tile
<
endTile
;
tile
++
)
{
const
int4
atomData
=
groupData
[
TILE_SIZE
*
tile
+
tgx
];
const
int
atom1
=
atomData
.
x
;
const
int
atom2
=
atomData
.
y
;
const
int
rangeStart
=
atomData
.
z
&
0xFFFF
;
const
int
rangeEnd
=
(
atomData
.
z
>>
16
)
&
0xFFFF
;
const
int
exclusions
=
atomData
.
w
;
real4
posq1
=
posq
[
atom1
];
localPos
[
threadIdx
.
x
]
=
posq
[
atom2
];
if
(
tgx
==
0
)
anyInteraction
[
local_warp
]
=
false
;
int
tj
=
tgx
;
SYNC_WARPS
;
for
(
int
j
=
rangeStart
;
j
<
rangeEnd
&&
!
anyInteraction
[
local_warp
];
j
++
)
{
if
(
tj
<
rangeEnd
)
{
bool
isExcluded
=
(((
exclusions
>>
tj
)
&
1
)
==
0
);
int
localIndex
=
tbx
+
tj
;
real3
delta
=
make_real3
(
localPos
[
localIndex
].
x
-
posq1
.
x
,
localPos
[
localIndex
].
y
-
posq1
.
y
,
localPos
[
localIndex
].
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
if
(
!
isExcluded
&&
r2
<
PADDED_CUTOFF_SQUARED
)
anyInteraction
[
local_warp
]
=
true
;
}
tj
=
(
tj
==
rangeEnd
-
1
?
rangeStart
:
tj
+
1
);
SYNC_WARPS
;
}
if
(
anyInteraction
[
local_warp
])
{
if
(
tgx
==
0
)
tileIndex
[
local_warp
]
=
atomicAdd
(
numGroupTiles
,
1
);
SYNC_WARPS
;
filteredGroupData
[
TILE_SIZE
*
tileIndex
[
local_warp
]
+
tgx
]
=
atomData
;
}
}
}
platforms/opencl/src/OpenCLKernels.cpp
View file @
7639d0bd
...
@@ -2861,7 +2861,7 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
...
@@ -2861,7 +2861,7 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, (useNeighborList ? filteredGroupData : interactionGroupData).getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, (useNeighborList ? filteredGroupData : interactionGroupData).getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, numGroupTiles.getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, numGroupTiles.getDeviceBuffer());
interactionGroupKernel
.
setArg
<
cl_
bool
>
(
index
++
,
useNeighborList
);
interactionGroupKernel.setArg<cl_
int
>(index++, useNeighborList);
index += 5;
index += 5;
for (auto& buffer : params->getBuffers())
for (auto& buffer : params->getBuffers())
interactionGroupKernel.setArg<cl::Memory>(index++, buffer.getMemory());
interactionGroupKernel.setArg<cl::Memory>(index++, buffer.getMemory());
...
...
platforms/opencl/src/kernels/customNonbondedGroups.cl
View file @
7639d0bd
...
@@ -43,7 +43,7 @@ __kernel void computeInteractionGroups(
...
@@ -43,7 +43,7 @@ __kernel void computeInteractionGroups(
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
forceBuffers,
#
endif
#
endif
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
int4*
restrict
groupData,
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
int4*
restrict
groupData,
__global
int*
restrict
numGroupTiles,
bool
useNeighborList,
__global
int*
restrict
numGroupTiles,
int
useNeighborList,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
const
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
...
@@ -159,8 +159,8 @@ __kernel void buildNeighborList(__global int* restrict rebuildNeighborList, __gl
...
@@ -159,8 +159,8 @@ __kernel void buildNeighborList(__global int* restrict rebuildNeighborList, __gl
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
; // index within the warp
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
; // index within the warp
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
; // block warpIndex
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
; // block warpIndex
__local
real4
localPos[LOCAL_MEMORY_SIZE]
;
__local
real4
localPos[LOCAL_MEMORY_SIZE]
;
__local
bool
anyInteraction[WARPS_IN_BLOCK]
;
__local
volatile
bool
anyInteraction[WARPS_IN_BLOCK]
;
__local
int
tileIndex[WARPS_IN_BLOCK]
;
__local
volatile
int
tileIndex[WARPS_IN_BLOCK]
;
const
unsigned
int
startTile
=
warp*NUM_TILES/totalWarps
;
const
unsigned
int
startTile
=
warp*NUM_TILES/totalWarps
;
const
unsigned
int
endTile
=
(
warp+1
)
*NUM_TILES/totalWarps
;
const
unsigned
int
endTile
=
(
warp+1
)
*NUM_TILES/totalWarps
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment