Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
85532a52
Commit
85532a52
authored
Jul 07, 2015
by
Peter Eastman
Browse files
CUDA allows different force groups to have different cutoffs
parent
63bb38a9
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
191 additions
and
123 deletions
+191
-123
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+2
-1
platforms/cuda/include/CudaNonbondedUtilities.h
platforms/cuda/include/CudaNonbondedUtilities.h
+33
-28
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+25
-5
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+12
-13
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+116
-73
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+3
-3
No files found.
platforms/cuda/include/CudaKernels.h
View file @
85532a52
...
@@ -752,7 +752,7 @@ public:
...
@@ -752,7 +752,7 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
GBSAOBCForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
GBSAOBCForce
&
force
);
private:
private:
double
prefactor
,
surfaceAreaFactor
;
double
prefactor
,
surfaceAreaFactor
,
cutoff
;
bool
hasCreatedKernels
;
bool
hasCreatedKernels
;
int
maxTiles
;
int
maxTiles
;
CudaContext
&
cu
;
CudaContext
&
cu
;
...
@@ -802,6 +802,7 @@ public:
...
@@ -802,6 +802,7 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomGBForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomGBForce
&
force
);
private:
private:
double
cutoff
;
bool
hasInitializedKernels
,
needParameterGradient
;
bool
hasInitializedKernels
,
needParameterGradient
;
int
maxTiles
,
numComputedValues
;
int
maxTiles
,
numComputedValues
;
CudaContext
&
cu
;
CudaContext
&
cu
;
...
...
platforms/cuda/include/CudaNonbondedUtilities.h
View file @
85532a52
...
@@ -129,31 +129,17 @@ public:
...
@@ -129,31 +129,17 @@ public:
return
forceThreadBlockSize
;
return
forceThreadBlockSize
;
}
}
/**
/**
* Get the cutoff distance.
* Get the
maximum
cutoff distance
used by any force group
.
*/
*/
double
getCutoffDistance
()
{
double
getMaxCutoffDistance
();
return
cutoff
;
}
/**
* Get whether any interactions have been added.
*/
bool
getHasInteractions
()
{
return
cutoff
!=
-
1.0
;
}
/**
* Get the force group in which nonbonded interactions should be computed.
*/
int
getForceGroup
()
{
return
nonbondedForceGroup
;
}
/**
/**
* Prepare to compute interactions. This updates the neighbor list.
* Prepare to compute interactions. This updates the neighbor list.
*/
*/
void
prepareInteractions
();
void
prepareInteractions
(
int
forceGroups
);
/**
/**
* Compute the nonbonded interactions.
* Compute the nonbonded interactions.
*/
*/
void
computeInteractions
();
void
computeInteractions
(
int
forceGroups
);
/**
/**
* Check to see if the neighbor list arrays are large enough, and make them bigger if necessary.
* Check to see if the neighbor list arrays are large enough, and make them bigger if necessary.
*/
*/
...
@@ -246,16 +232,20 @@ public:
...
@@ -246,16 +232,20 @@ public:
* @param arguments arrays (other than per-atom parameters) that should be passed as arguments to the kernel
* @param arguments arrays (other than per-atom parameters) that should be passed as arguments to the kernel
* @param useExclusions specifies whether exclusions are applied to this interaction
* @param useExclusions specifies whether exclusions are applied to this interaction
* @param isSymmetric specifies whether the interaction is symmetric
* @param isSymmetric specifies whether the interaction is symmetric
* @param groups the set of force groups this kernel is for
*/
*/
CUfunction
createInteractionKernel
(
const
std
::
string
&
source
,
std
::
vector
<
ParameterInfo
>&
params
,
std
::
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
);
CUfunction
createInteractionKernel
(
const
std
::
string
&
source
,
std
::
vector
<
ParameterInfo
>&
params
,
std
::
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
);
/**
* Create the set of kernels that will be needed for a particular combination of force groups.
*
* @param groups the set of force groups
*/
void
createKernelsForGroups
(
int
groups
);
private:
private:
class
KernelSet
;
class
BlockSortTrait
;
class
BlockSortTrait
;
CudaContext
&
context
;
CudaContext
&
context
;
CUfunction
forceKernel
;
std
::
map
<
int
,
KernelSet
>
groupKernels
;
CUfunction
findBlockBoundsKernel
;
CUfunction
sortBoxDataKernel
;
CUfunction
findInteractingBlocksKernel
;
CUfunction
findInteractionsWithinBlocksKernel
;
CudaArray
*
exclusionTiles
;
CudaArray
*
exclusionTiles
;
CudaArray
*
exclusions
;
CudaArray
*
exclusions
;
CudaArray
*
exclusionIndices
;
CudaArray
*
exclusionIndices
;
...
@@ -275,11 +265,26 @@ private:
...
@@ -275,11 +265,26 @@ private:
std
::
vector
<
std
::
vector
<
int
>
>
atomExclusions
;
std
::
vector
<
std
::
vector
<
int
>
>
atomExclusions
;
std
::
vector
<
ParameterInfo
>
parameters
;
std
::
vector
<
ParameterInfo
>
parameters
;
std
::
vector
<
ParameterInfo
>
arguments
;
std
::
vector
<
ParameterInfo
>
arguments
;
std
::
string
kernelSource
;
std
::
map
<
int
,
double
>
groupCutoff
;
std
::
map
<
std
::
str
in
g
,
std
::
string
>
kernelDefines
;
std
::
map
<
in
t
,
std
::
string
>
groupKernelSource
;
double
c
utoff
;
double
lastC
utoff
;
bool
useCutoff
,
usePeriodic
,
anyExclusions
,
usePadding
,
forceRebuildNeighborList
;
bool
useCutoff
,
usePeriodic
,
anyExclusions
,
usePadding
,
forceRebuildNeighborList
;
int
startTileIndex
,
numTiles
,
startBlockIndex
,
numBlocks
,
maxTiles
,
numForceThreadBlocks
,
forceThreadBlockSize
,
nonbondedForceGroup
,
numAtoms
;
int
startTileIndex
,
numTiles
,
startBlockIndex
,
numBlocks
,
maxTiles
,
maxExclusions
,
numForceThreadBlocks
,
forceThreadBlockSize
,
numAtoms
,
groupFlags
;
};
/**
* This class stores the kernels to execute for a set of force groups.
*/
class
CudaNonbondedUtilities
::
KernelSet
{
public:
bool
hasForces
;
double
cutoffDistance
;
CUfunction
forceKernel
;
CUfunction
findBlockBoundsKernel
;
CUfunction
sortBoxDataKernel
;
CUfunction
findInteractingBlocksKernel
;
CUfunction
findInteractionsWithinBlocksKernel
;
};
};
/**
/**
...
...
platforms/cuda/src/CudaContext.cpp
View file @
85532a52
...
@@ -49,6 +49,7 @@
...
@@ -49,6 +49,7 @@
#include <fstream>
#include <fstream>
#include <iomanip>
#include <iomanip>
#include <iostream>
#include <iostream>
#include <set>
#include <sstream>
#include <sstream>
#include <typeinfo>
#include <typeinfo>
#include <cudaProfiler.h>
#include <cudaProfiler.h>
...
@@ -405,13 +406,32 @@ void CudaContext::setAsCurrent() {
...
@@ -405,13 +406,32 @@ void CudaContext::setAsCurrent() {
}
}
string
CudaContext
::
replaceStrings
(
const
string
&
input
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
replacements
)
const
{
string
CudaContext
::
replaceStrings
(
const
string
&
input
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
replacements
)
const
{
static
set
<
char
>
symbolChars
;
if
(
symbolChars
.
size
()
==
0
)
{
symbolChars
.
insert
(
'_'
);
for
(
char
c
=
'a'
;
c
<=
'z'
;
c
++
)
symbolChars
.
insert
(
c
);
for
(
char
c
=
'A'
;
c
<=
'Z'
;
c
++
)
symbolChars
.
insert
(
c
);
for
(
char
c
=
'0'
;
c
<=
'9'
;
c
++
)
symbolChars
.
insert
(
c
);
}
string
result
=
input
;
string
result
=
input
;
for
(
map
<
string
,
string
>::
const_iterator
iter
=
replacements
.
begin
();
iter
!=
replacements
.
end
();
iter
++
)
{
for
(
map
<
string
,
string
>::
const_iterator
iter
=
replacements
.
begin
();
iter
!=
replacements
.
end
();
iter
++
)
{
int
index
=
-
1
;
int
index
=
0
;
int
size
=
iter
->
first
.
size
();
do
{
do
{
index
=
result
.
find
(
iter
->
first
);
index
=
result
.
find
(
iter
->
first
,
index
);
if
(
index
!=
result
.
npos
)
if
(
index
!=
result
.
npos
)
{
result
.
replace
(
index
,
iter
->
first
.
size
(),
iter
->
second
);
if
((
index
==
0
||
symbolChars
.
find
(
result
[
index
-
1
])
==
symbolChars
.
end
())
&&
(
index
==
result
.
size
()
-
size
||
symbolChars
.
find
(
result
[
index
+
size
])
==
symbolChars
.
end
()))
{
// We have found a complete symbol, not part of a longer symbol.
result
.
replace
(
index
,
size
,
iter
->
second
);
index
+=
iter
->
second
.
size
();
}
else
index
++
;
}
}
while
(
index
!=
result
.
npos
);
}
while
(
index
!=
result
.
npos
);
}
}
return
result
;
return
result
;
...
@@ -1216,7 +1236,7 @@ void CudaContext::reorderAtomsImpl() {
...
@@ -1216,7 +1236,7 @@ void CudaContext::reorderAtomsImpl() {
if
(
useHilbert
)
if
(
useHilbert
)
binWidth
=
(
Real
)
(
max
(
max
(
maxx
-
minx
,
maxy
-
miny
),
maxz
-
minz
)
/
255.0
);
binWidth
=
(
Real
)
(
max
(
max
(
maxx
-
minx
,
maxy
-
miny
),
maxz
-
minz
)
/
255.0
);
else
else
binWidth
=
(
Real
)
(
0.2
*
nonbonded
->
getCutoffDistance
());
binWidth
=
(
Real
)
(
0.2
*
nonbonded
->
get
Max
CutoffDistance
());
Real
invBinWidth
=
(
Real
)
(
1.0
/
binWidth
);
Real
invBinWidth
=
(
Real
)
(
1.0
/
binWidth
);
int
xbins
=
1
+
(
int
)
((
maxx
-
minx
)
*
invBinWidth
);
int
xbins
=
1
+
(
int
)
((
maxx
-
minx
)
*
invBinWidth
);
int
ybins
=
1
+
(
int
)
((
maxy
-
miny
)
*
invBinWidth
);
int
ybins
=
1
+
(
int
)
((
maxy
-
miny
)
*
invBinWidth
);
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
85532a52
...
@@ -98,16 +98,13 @@ void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool
...
@@ -98,16 +98,13 @@ void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool
for
(
vector
<
CudaContext
::
ForcePreComputation
*>::
iterator
iter
=
cu
.
getPreComputations
().
begin
();
iter
!=
cu
.
getPreComputations
().
end
();
++
iter
)
for
(
vector
<
CudaContext
::
ForcePreComputation
*>::
iterator
iter
=
cu
.
getPreComputations
().
begin
();
iter
!=
cu
.
getPreComputations
().
end
();
++
iter
)
(
*
iter
)
->
computeForceAndEnergy
(
includeForces
,
includeEnergy
,
groups
);
(
*
iter
)
->
computeForceAndEnergy
(
includeForces
,
includeEnergy
,
groups
);
CudaNonbondedUtilities
&
nb
=
cu
.
getNonbondedUtilities
();
CudaNonbondedUtilities
&
nb
=
cu
.
getNonbondedUtilities
();
bool
includeNonbonded
=
((
groups
&
(
1
<<
nb
.
getForceGroup
()))
!=
0
);
cu
.
setComputeForceCount
(
cu
.
getComputeForceCount
()
+
1
);
cu
.
setComputeForceCount
(
cu
.
getComputeForceCount
()
+
1
);
if
(
includeNonbonded
)
nb
.
prepareInteractions
(
groups
);
nb
.
prepareInteractions
();
}
}
double
CudaCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
int
groups
,
bool
&
valid
)
{
double
CudaCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
int
groups
,
bool
&
valid
)
{
cu
.
getBondedUtilities
().
computeInteractions
(
groups
);
cu
.
getBondedUtilities
().
computeInteractions
(
groups
);
if
((
groups
&
(
1
<<
cu
.
getNonbondedUtilities
().
getForceGroup
()))
!=
0
)
cu
.
getNonbondedUtilities
().
computeInteractions
(
groups
);
cu
.
getNonbondedUtilities
().
computeInteractions
();
double
sum
=
0.0
;
double
sum
=
0.0
;
for
(
vector
<
CudaContext
::
ForcePostComputation
*>::
iterator
iter
=
cu
.
getPostComputations
().
begin
();
iter
!=
cu
.
getPostComputations
().
end
();
++
iter
)
for
(
vector
<
CudaContext
::
ForcePostComputation
*>::
iterator
iter
=
cu
.
getPostComputations
().
begin
();
iter
!=
cu
.
getPostComputations
().
end
();
++
iter
)
sum
+=
(
*
iter
)
->
computeForceAndEnergy
(
includeForces
,
includeEnergy
,
groups
);
sum
+=
(
*
iter
)
->
computeForceAndEnergy
(
includeForces
,
includeEnergy
,
groups
);
...
@@ -2570,8 +2567,9 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
...
@@ -2570,8 +2567,9 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
surfaceAreaFactor
=
-
6.0
*
4
*
M_PI
*
force
.
getSurfaceAreaEnergy
();
surfaceAreaFactor
=
-
6.0
*
4
*
M_PI
*
force
.
getSurfaceAreaEnergy
();
bool
useCutoff
=
(
force
.
getNonbondedMethod
()
!=
GBSAOBCForce
::
NoCutoff
);
bool
useCutoff
=
(
force
.
getNonbondedMethod
()
!=
GBSAOBCForce
::
NoCutoff
);
bool
usePeriodic
=
(
force
.
getNonbondedMethod
()
!=
GBSAOBCForce
::
NoCutoff
&&
force
.
getNonbondedMethod
()
!=
GBSAOBCForce
::
CutoffNonPeriodic
);
bool
usePeriodic
=
(
force
.
getNonbondedMethod
()
!=
GBSAOBCForce
::
NoCutoff
&&
force
.
getNonbondedMethod
()
!=
GBSAOBCForce
::
CutoffNonPeriodic
);
cutoff
=
force
.
getCutoffDistance
();
string
source
=
CudaKernelSources
::
gbsaObc2
;
string
source
=
CudaKernelSources
::
gbsaObc2
;
nb
.
addInteraction
(
useCutoff
,
usePeriodic
,
false
,
force
.
getCutoffDistance
()
,
vector
<
vector
<
int
>
>
(),
source
,
force
.
getForceGroup
());
nb
.
addInteraction
(
useCutoff
,
usePeriodic
,
false
,
cutoff
,
vector
<
vector
<
int
>
>
(),
source
,
force
.
getForceGroup
());
nb
.
addParameter
(
CudaNonbondedUtilities
::
ParameterInfo
(
"obcParams"
,
"float"
,
2
,
sizeof
(
float2
),
params
->
getDevicePointer
()));;
nb
.
addParameter
(
CudaNonbondedUtilities
::
ParameterInfo
(
"obcParams"
,
"float"
,
2
,
sizeof
(
float2
),
params
->
getDevicePointer
()));;
nb
.
addParameter
(
CudaNonbondedUtilities
::
ParameterInfo
(
"bornForce"
,
"long long"
,
1
,
sizeof
(
long
long
),
bornForce
->
getDevicePointer
()));;
nb
.
addParameter
(
CudaNonbondedUtilities
::
ParameterInfo
(
"bornForce"
,
"long long"
,
1
,
sizeof
(
long
long
),
bornForce
->
getDevicePointer
()));;
cu
.
addForce
(
new
CudaGBSAOBCForceInfo
(
force
));
cu
.
addForce
(
new
CudaGBSAOBCForceInfo
(
force
));
...
@@ -2591,8 +2589,8 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
...
@@ -2591,8 +2589,8 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
defines
[
"USE_PERIODIC"
]
=
"1"
;
defines
[
"USE_PERIODIC"
]
=
"1"
;
if
(
cu
.
getComputeCapability
()
>=
3.0
&&
!
cu
.
getUseDoublePrecision
())
if
(
cu
.
getComputeCapability
()
>=
3.0
&&
!
cu
.
getUseDoublePrecision
())
defines
[
"ENABLE_SHUFFLE"
]
=
"1"
;
defines
[
"ENABLE_SHUFFLE"
]
=
"1"
;
defines
[
"CUTOFF_SQUARED"
]
=
cu
.
doubleToString
(
nb
.
getCutoffDistance
()
*
nb
.
getCutoffDistance
()
);
defines
[
"CUTOFF_SQUARED"
]
=
cu
.
doubleToString
(
cutoff
*
cutoff
);
defines
[
"CUTOFF"
]
=
cu
.
doubleToString
(
nb
.
getCutoffDistance
()
);
defines
[
"CUTOFF"
]
=
cu
.
doubleToString
(
cutoff
);
defines
[
"PREFACTOR"
]
=
cu
.
doubleToString
(
prefactor
);
defines
[
"PREFACTOR"
]
=
cu
.
doubleToString
(
prefactor
);
defines
[
"SURFACE_AREA_FACTOR"
]
=
cu
.
doubleToString
(
surfaceAreaFactor
);
defines
[
"SURFACE_AREA_FACTOR"
]
=
cu
.
doubleToString
(
surfaceAreaFactor
);
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
...
@@ -2764,6 +2762,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
...
@@ -2764,6 +2762,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
if
(
cu
.
getPlatformData
().
contexts
.
size
()
>
1
)
if
(
cu
.
getPlatformData
().
contexts
.
size
()
>
1
)
throw
OpenMMException
(
"CustomGBForce does not support using multiple CUDA devices"
);
throw
OpenMMException
(
"CustomGBForce does not support using multiple CUDA devices"
);
cutoff
=
force
.
getCutoffDistance
();
bool
useExclusionsForValue
=
false
;
bool
useExclusionsForValue
=
false
;
numComputedValues
=
force
.
getNumComputedValues
();
numComputedValues
=
force
.
getNumComputedValues
();
vector
<
string
>
computedValueNames
(
force
.
getNumComputedValues
());
vector
<
string
>
computedValueNames
(
force
.
getNumComputedValues
());
...
@@ -2956,7 +2955,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
...
@@ -2956,7 +2955,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
pairValueDefines
[
"NEED_PADDING"
]
=
"1"
;
pairValueDefines
[
"NEED_PADDING"
]
=
"1"
;
pairValueDefines
[
"WARPS_PER_GROUP"
]
=
cu
.
intToString
(
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
()
/
CudaContext
::
TileSize
);
pairValueDefines
[
"WARPS_PER_GROUP"
]
=
cu
.
intToString
(
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
()
/
CudaContext
::
TileSize
);
pairValueDefines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
());
pairValueDefines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
());
pairValueDefines
[
"CUTOFF_SQUARED"
]
=
cu
.
doubleToString
(
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
()
);
pairValueDefines
[
"CUTOFF_SQUARED"
]
=
cu
.
doubleToString
(
cutoff
*
cutoff
);
pairValueDefines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
pairValueDefines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
pairValueDefines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
pairValueDefines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
pairValueDefines
[
"NUM_BLOCKS"
]
=
cu
.
intToString
(
cu
.
getNumAtomBlocks
());
pairValueDefines
[
"NUM_BLOCKS"
]
=
cu
.
intToString
(
cu
.
getNumAtomBlocks
());
...
@@ -3123,7 +3122,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
...
@@ -3123,7 +3122,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
pairEnergyDefines
[
"NEED_PADDING"
]
=
"1"
;
pairEnergyDefines
[
"NEED_PADDING"
]
=
"1"
;
pairEnergyDefines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
());
pairEnergyDefines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
());
pairEnergyDefines
[
"WARPS_PER_GROUP"
]
=
cu
.
intToString
(
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
()
/
CudaContext
::
TileSize
);
pairEnergyDefines
[
"WARPS_PER_GROUP"
]
=
cu
.
intToString
(
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
()
/
CudaContext
::
TileSize
);
pairEnergyDefines
[
"CUTOFF_SQUARED"
]
=
cu
.
doubleToString
(
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
()
);
pairEnergyDefines
[
"CUTOFF_SQUARED"
]
=
cu
.
doubleToString
(
cutoff
*
cutoff
);
pairEnergyDefines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
pairEnergyDefines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
pairEnergyDefines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
pairEnergyDefines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
pairEnergyDefines
[
"NUM_BLOCKS"
]
=
cu
.
intToString
(
cu
.
getNumAtomBlocks
());
pairEnergyDefines
[
"NUM_BLOCKS"
]
=
cu
.
intToString
(
cu
.
getNumAtomBlocks
());
...
@@ -3367,7 +3366,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
...
@@ -3367,7 +3366,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
globals
->
upload
(
globalParamValues
);
globals
->
upload
(
globalParamValues
);
arguments
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
prefix
+
"globals"
,
"float"
,
1
,
sizeof
(
float
),
globals
->
getDevicePointer
()));
arguments
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
prefix
+
"globals"
,
"float"
,
1
,
sizeof
(
float
),
globals
->
getDevicePointer
()));
}
}
cu
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
force
.
getNumExclusions
()
>
0
,
force
.
getCutoffDistance
()
,
exclusionList
,
source
,
force
.
getForceGroup
());
cu
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
force
.
getNumExclusions
()
>
0
,
cutoff
,
exclusionList
,
source
,
force
.
getForceGroup
());
for
(
int
i
=
0
;
i
<
(
int
)
parameters
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
parameters
.
size
();
i
++
)
cu
.
getNonbondedUtilities
().
addParameter
(
parameters
[
i
]);
cu
.
getNonbondedUtilities
().
addParameter
(
parameters
[
i
]);
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
...
@@ -3393,7 +3392,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -3393,7 +3392,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
int
endExclusionIndex
=
(
cu
.
getContextIndex
()
+
1
)
*
numExclusionTiles
/
numContexts
;
int
endExclusionIndex
=
(
cu
.
getContextIndex
()
+
1
)
*
numExclusionTiles
/
numContexts
;
pairValueDefines
[
"FIRST_EXCLUSION_TILE"
]
=
cu
.
intToString
(
startExclusionIndex
);
pairValueDefines
[
"FIRST_EXCLUSION_TILE"
]
=
cu
.
intToString
(
startExclusionIndex
);
pairValueDefines
[
"LAST_EXCLUSION_TILE"
]
=
cu
.
intToString
(
endExclusionIndex
);
pairValueDefines
[
"LAST_EXCLUSION_TILE"
]
=
cu
.
intToString
(
endExclusionIndex
);
pairValueDefines
[
"CUTOFF"
]
=
cu
.
doubleToString
(
nb
.
getCutoffDistance
()
);
pairValueDefines
[
"CUTOFF"
]
=
cu
.
doubleToString
(
cutoff
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
pairValueSrc
,
pairValueDefines
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
pairValueSrc
,
pairValueDefines
);
pairValueKernel
=
cu
.
getKernel
(
module
,
"computeN2Value"
);
pairValueKernel
=
cu
.
getKernel
(
module
,
"computeN2Value"
);
pairValueSrc
=
""
;
pairValueSrc
=
""
;
...
@@ -3407,7 +3406,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -3407,7 +3406,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
int
endExclusionIndex
=
(
cu
.
getContextIndex
()
+
1
)
*
numExclusionTiles
/
numContexts
;
int
endExclusionIndex
=
(
cu
.
getContextIndex
()
+
1
)
*
numExclusionTiles
/
numContexts
;
pairEnergyDefines
[
"FIRST_EXCLUSION_TILE"
]
=
cu
.
intToString
(
startExclusionIndex
);
pairEnergyDefines
[
"FIRST_EXCLUSION_TILE"
]
=
cu
.
intToString
(
startExclusionIndex
);
pairEnergyDefines
[
"LAST_EXCLUSION_TILE"
]
=
cu
.
intToString
(
endExclusionIndex
);
pairEnergyDefines
[
"LAST_EXCLUSION_TILE"
]
=
cu
.
intToString
(
endExclusionIndex
);
pairEnergyDefines
[
"CUTOFF"
]
=
cu
.
doubleToString
(
nb
.
getCutoffDistance
()
);
pairEnergyDefines
[
"CUTOFF"
]
=
cu
.
doubleToString
(
cutoff
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
pairEnergySrc
,
pairEnergyDefines
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
pairEnergySrc
,
pairEnergyDefines
);
pairEnergyKernel
=
cu
.
getKernel
(
module
,
"computeN2Energy"
);
pairEnergyKernel
=
cu
.
getKernel
(
module
,
"computeN2Energy"
);
pairEnergySrc
=
""
;
pairEnergySrc
=
""
;
...
...
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
85532a52
...
@@ -62,10 +62,10 @@ private:
...
@@ -62,10 +62,10 @@ private:
bool
useDouble
;
bool
useDouble
;
};
};
CudaNonbondedUtilities
::
CudaNonbondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
cutoff
(
-
1.0
),
useCutoff
(
false
),
usePeriodic
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
CudaNonbondedUtilities
::
CudaNonbondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
useCutoff
(
false
),
usePeriodic
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
nonbondedForceGroup
(
0
),
forceRebuildNeighborList
(
true
)
{
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
forceRebuildNeighborList
(
true
)
,
lastCutoff
(
0.0
),
groupFlags
(
0
)
{
// Decide how many thread blocks to use.
// Decide how many thread blocks to use.
string
errorMessage
=
"Error initializing nonbonded utilities"
;
string
errorMessage
=
"Error initializing nonbonded utilities"
;
...
@@ -109,24 +109,28 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() {
...
@@ -109,24 +109,28 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() {
}
}
void
CudaNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
,
int
forceGroup
)
{
void
CudaNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
,
int
forceGroup
)
{
if
(
cutoff
!=
-
1.
0
)
{
if
(
groupCutoff
.
size
()
>
0
)
{
if
(
usesCutoff
!=
useCutoff
)
if
(
usesCutoff
!=
useCutoff
)
throw
OpenMMException
(
"All Forces must agree on whether to use a cutoff"
);
throw
OpenMMException
(
"All Forces must agree on whether to use a cutoff"
);
if
(
usesPeriodic
!=
usePeriodic
)
if
(
usesPeriodic
!=
usePeriodic
)
throw
OpenMMException
(
"All Forces must agree on whether to use periodic boundary conditions"
);
throw
OpenMMException
(
"All Forces must agree on whether to use periodic boundary conditions"
);
if
(
cutoffDistance
!=
cutoff
)
if
(
usesCutoff
&&
groupCutoff
.
find
(
forceGroup
)
!=
groupCutoff
.
end
()
&&
groupCutoff
[
forceGroup
]
!=
cutoffDistance
)
throw
OpenMMException
(
"All Forces must use the same cutoff distance"
);
throw
OpenMMException
(
"All Forces in a single force group must use the same cutoff distance"
);
if
(
forceGroup
!=
nonbondedForceGroup
)
throw
OpenMMException
(
"All nonbonded forces must be in the same force group"
);
}
}
if
(
usesExclusions
)
if
(
usesExclusions
)
requestExclusions
(
exclusionList
);
requestExclusions
(
exclusionList
);
useCutoff
=
usesCutoff
;
useCutoff
=
usesCutoff
;
usePeriodic
=
usesPeriodic
;
usePeriodic
=
usesPeriodic
;
cutoff
=
cutoffDistance
;
groupCutoff
[
forceGroup
]
=
cutoffDistance
;
if
(
kernel
.
size
()
>
0
)
groupFlags
|=
1
<<
forceGroup
;
kernelSource
+=
kernel
+
"
\n
"
;
if
(
kernel
.
size
()
>
0
)
{
nonbondedForceGroup
=
forceGroup
;
if
(
groupKernelSource
.
find
(
forceGroup
)
==
groupKernelSource
.
end
())
groupKernelSource
[
forceGroup
]
=
""
;
map
<
string
,
string
>
replacements
;
replacements
[
"CUTOFF"
]
=
"CUTOFF_"
+
context
.
intToString
(
forceGroup
);
replacements
[
"CUTOFF_SQUARED"
]
=
"CUTOFF_"
+
context
.
intToString
(
forceGroup
)
+
"_SQUARED"
;
groupKernelSource
[
forceGroup
]
+=
context
.
replaceStrings
(
kernel
,
replacements
)
+
"
\n
"
;
}
}
}
void
CudaNonbondedUtilities
::
addParameter
(
const
ParameterInfo
&
parameter
)
{
void
CudaNonbondedUtilities
::
addParameter
(
const
ParameterInfo
&
parameter
)
{
...
@@ -213,6 +217,9 @@ void CudaNonbondedUtilities::initialize(const System& system) {
...
@@ -213,6 +217,9 @@ void CudaNonbondedUtilities::initialize(const System& system) {
exclusionIndicesVec
.
insert
(
exclusionIndicesVec
.
end
(),
exclusionBlocksForBlock
[
i
].
begin
(),
exclusionBlocksForBlock
[
i
].
end
());
exclusionIndicesVec
.
insert
(
exclusionIndicesVec
.
end
(),
exclusionBlocksForBlock
[
i
].
begin
(),
exclusionBlocksForBlock
[
i
].
end
());
exclusionRowIndicesVec
[
i
+
1
]
=
exclusionIndicesVec
.
size
();
exclusionRowIndicesVec
[
i
+
1
]
=
exclusionIndicesVec
.
size
();
}
}
maxExclusions
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
exclusionBlocksForBlock
.
size
();
i
++
)
maxExclusions
=
(
maxExclusions
>
exclusionBlocksForBlock
[
i
].
size
()
?
maxExclusions
:
exclusionBlocksForBlock
[
i
].
size
());
exclusionIndices
=
CudaArray
::
create
<
unsigned
int
>
(
context
,
exclusionIndicesVec
.
size
(),
"exclusionIndices"
);
exclusionIndices
=
CudaArray
::
create
<
unsigned
int
>
(
context
,
exclusionIndicesVec
.
size
(),
"exclusionIndices"
);
exclusionRowIndices
=
CudaArray
::
create
<
unsigned
int
>
(
context
,
exclusionRowIndicesVec
.
size
(),
"exclusionRowIndices"
);
exclusionRowIndices
=
CudaArray
::
create
<
unsigned
int
>
(
context
,
exclusionRowIndicesVec
.
size
(),
"exclusionRowIndices"
);
exclusionIndices
->
upload
(
exclusionIndicesVec
);
exclusionIndices
->
upload
(
exclusionIndicesVec
);
...
@@ -270,29 +277,33 @@ void CudaNonbondedUtilities::initialize(const System& system) {
...
@@ -270,29 +277,33 @@ void CudaNonbondedUtilities::initialize(const System& system) {
interactionCount
->
upload
(
count
);
interactionCount
->
upload
(
count
);
}
}
//
Create
kernels.
//
Record arguments for
kernels.
if
(
kernelSource
.
size
()
>
0
)
forceArgs
.
push_back
(
&
context
.
getForce
().
getDevicePointer
());
forceKernel
=
createInteractionKernel
(
kernelSource
,
parameters
,
arguments
,
true
,
true
);
forceArgs
.
push_back
(
&
context
.
getEnergyBuffer
().
getDevicePointer
());
forceArgs
.
push_back
(
&
context
.
getPosq
().
getDevicePointer
());
forceArgs
.
push_back
(
&
exclusions
->
getDevicePointer
());
forceArgs
.
push_back
(
&
exclusionTiles
->
getDevicePointer
());
forceArgs
.
push_back
(
&
startTileIndex
);
forceArgs
.
push_back
(
&
numTiles
);
if
(
useCutoff
)
{
forceArgs
.
push_back
(
&
interactingTiles
->
getDevicePointer
());
forceArgs
.
push_back
(
&
interactionCount
->
getDevicePointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxSizePointer
());
forceArgs
.
push_back
(
context
.
getInvPeriodicBoxSizePointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxVecXPointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxVecYPointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxVecZPointer
());
forceArgs
.
push_back
(
&
maxTiles
);
forceArgs
.
push_back
(
&
blockCenter
->
getDevicePointer
());
forceArgs
.
push_back
(
&
blockBoundingBox
->
getDevicePointer
());
forceArgs
.
push_back
(
&
interactingAtoms
->
getDevicePointer
());
}
for
(
int
i
=
0
;
i
<
(
int
)
parameters
.
size
();
i
++
)
forceArgs
.
push_back
(
&
parameters
[
i
].
getMemory
());
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
forceArgs
.
push_back
(
&
arguments
[
i
].
getMemory
());
if
(
useCutoff
)
{
if
(
useCutoff
)
{
double
padding
=
(
usePadding
?
0.1
*
cutoff
:
0.0
);
double
paddedCutoff
=
cutoff
+
padding
;
map
<
string
,
string
>
defines
;
defines
[
"TILE_SIZE"
]
=
context
.
intToString
(
CudaContext
::
TileSize
);
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getNumAtoms
());
defines
[
"PADDING"
]
=
context
.
doubleToString
(
padding
);
defines
[
"PADDED_CUTOFF"
]
=
context
.
doubleToString
(
paddedCutoff
);
defines
[
"PADDED_CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
paddedCutoff
*
paddedCutoff
);
defines
[
"NUM_TILES_WITH_EXCLUSIONS"
]
=
context
.
intToString
(
exclusionTiles
->
getSize
());
if
(
usePeriodic
)
defines
[
"USE_PERIODIC"
]
=
"1"
;
int
maxExclusions
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
exclusionBlocksForBlock
.
size
();
i
++
)
maxExclusions
=
(
maxExclusions
>
exclusionBlocksForBlock
[
i
].
size
()
?
maxExclusions
:
exclusionBlocksForBlock
[
i
].
size
());
defines
[
"MAX_EXCLUSIONS"
]
=
context
.
intToString
(
maxExclusions
);
CUmodule
interactingBlocksProgram
=
context
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
findInteractingBlocks
,
defines
);
findBlockBoundsKernel
=
context
.
getKernel
(
interactingBlocksProgram
,
"findBlockBounds"
);
findBlockBoundsArgs
.
push_back
(
&
numAtoms
);
findBlockBoundsArgs
.
push_back
(
&
numAtoms
);
findBlockBoundsArgs
.
push_back
(
context
.
getPeriodicBoxSizePointer
());
findBlockBoundsArgs
.
push_back
(
context
.
getPeriodicBoxSizePointer
());
findBlockBoundsArgs
.
push_back
(
context
.
getInvPeriodicBoxSizePointer
());
findBlockBoundsArgs
.
push_back
(
context
.
getInvPeriodicBoxSizePointer
());
...
@@ -304,7 +315,6 @@ void CudaNonbondedUtilities::initialize(const System& system) {
...
@@ -304,7 +315,6 @@ void CudaNonbondedUtilities::initialize(const System& system) {
findBlockBoundsArgs
.
push_back
(
&
blockBoundingBox
->
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
blockBoundingBox
->
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
rebuildNeighborList
->
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
rebuildNeighborList
->
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
sortedBlocks
->
getDevicePointer
());
findBlockBoundsArgs
.
push_back
(
&
sortedBlocks
->
getDevicePointer
());
sortBoxDataKernel
=
context
.
getKernel
(
interactingBlocksProgram
,
"sortBoxData"
);
sortBoxDataArgs
.
push_back
(
&
sortedBlocks
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
sortedBlocks
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
blockCenter
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
blockCenter
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
blockBoundingBox
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
blockBoundingBox
->
getDevicePointer
());
...
@@ -315,7 +325,6 @@ void CudaNonbondedUtilities::initialize(const System& system) {
...
@@ -315,7 +325,6 @@ void CudaNonbondedUtilities::initialize(const System& system) {
sortBoxDataArgs
.
push_back
(
&
interactionCount
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
interactionCount
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
rebuildNeighborList
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
rebuildNeighborList
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
forceRebuildNeighborList
);
sortBoxDataArgs
.
push_back
(
&
forceRebuildNeighborList
);
findInteractingBlocksKernel
=
context
.
getKernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxSizePointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxSizePointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getInvPeriodicBoxSizePointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getInvPeriodicBoxSizePointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxVecXPointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxVecXPointer
());
...
@@ -338,30 +347,48 @@ void CudaNonbondedUtilities::initialize(const System& system) {
...
@@ -338,30 +347,48 @@ void CudaNonbondedUtilities::initialize(const System& system) {
}
}
}
}
void
CudaNonbondedUtilities
::
prepareInteractions
()
{
double
CudaNonbondedUtilities
::
getMaxCutoffDistance
()
{
double
cutoff
=
0.0
;
for
(
map
<
int
,
double
>::
const_iterator
iter
=
groupCutoff
.
begin
();
iter
!=
groupCutoff
.
end
();
++
iter
)
cutoff
=
max
(
cutoff
,
iter
->
second
);
return
cutoff
;
}
void
CudaNonbondedUtilities
::
prepareInteractions
(
int
forceGroups
)
{
if
((
forceGroups
&
groupFlags
)
==
0
)
return
;
if
(
groupKernels
.
find
(
forceGroups
)
==
groupKernels
.
end
())
createKernelsForGroups
(
forceGroups
);
if
(
!
useCutoff
)
if
(
!
useCutoff
)
return
;
return
;
if
(
numTiles
==
0
)
if
(
numTiles
==
0
)
return
;
return
;
KernelSet
&
kernels
=
groupKernels
[
forceGroups
];
if
(
usePeriodic
)
{
if
(
usePeriodic
)
{
double4
box
=
context
.
getPeriodicBoxSize
();
double4
box
=
context
.
getPeriodicBoxSize
();
double
minAllowedSize
=
1.999999
*
cutoff
;
double
minAllowedSize
=
1.999999
*
kernels
.
cutoffDistance
;
if
(
box
.
x
<
minAllowedSize
||
box
.
y
<
minAllowedSize
||
box
.
z
<
minAllowedSize
)
if
(
box
.
x
<
minAllowedSize
||
box
.
y
<
minAllowedSize
||
box
.
z
<
minAllowedSize
)
throw
OpenMMException
(
"The periodic box size has decreased to less than twice the nonbonded cutoff."
);
throw
OpenMMException
(
"The periodic box size has decreased to less than twice the nonbonded cutoff."
);
}
}
// Compute the neighbor list.
// Compute the neighbor list.
context
.
executeKernel
(
findBlockBoundsKernel
,
&
findBlockBoundsArgs
[
0
],
context
.
getNumAtoms
());
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
forceRebuildNeighborList
=
true
;
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
&
findBlockBoundsArgs
[
0
],
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
blockSorter
->
sort
(
*
sortedBlocks
);
context
.
executeKernel
(
sortBoxDataKernel
,
&
sortBoxDataArgs
[
0
],
context
.
getNumAtoms
());
context
.
executeKernel
(
kernels
.
sortBoxDataKernel
,
&
sortBoxDataArgs
[
0
],
context
.
getNumAtoms
());
context
.
executeKernel
(
findInteractingBlocksKernel
,
&
findInteractingBlocksArgs
[
0
],
context
.
getNumAtoms
(),
256
);
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
&
findInteractingBlocksArgs
[
0
],
context
.
getNumAtoms
(),
256
);
forceRebuildNeighborList
=
false
;
forceRebuildNeighborList
=
false
;
lastCutoff
=
kernels
.
cutoffDistance
;
}
}
void
CudaNonbondedUtilities
::
computeInteractions
()
{
void
CudaNonbondedUtilities
::
computeInteractions
(
int
forceGroups
)
{
if
(
kernelSource
.
size
()
>
0
)
{
if
((
forceGroups
&
groupFlags
)
==
0
)
context
.
executeKernel
(
forceKernel
,
&
forceArgs
[
0
],
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
return
;
KernelSet
&
kernels
=
groupKernels
[
forceGroups
];
if
(
kernels
.
hasForces
)
{
context
.
executeKernel
(
kernels
.
forceKernel
,
&
forceArgs
[
0
],
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
if
(
context
.
getComputeForceCount
()
==
1
)
if
(
context
.
getComputeForceCount
()
==
1
)
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
}
}
...
@@ -411,7 +438,43 @@ void CudaNonbondedUtilities::setAtomBlockRange(double startFraction, double endF
...
@@ -411,7 +438,43 @@ void CudaNonbondedUtilities::setAtomBlockRange(double startFraction, double endF
forceRebuildNeighborList
=
true
;
forceRebuildNeighborList
=
true
;
}
}
CUfunction
CudaNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
vector
<
ParameterInfo
>&
params
,
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
)
{
void
CudaNonbondedUtilities
::
createKernelsForGroups
(
int
groups
)
{
KernelSet
kernels
;
double
cutoff
=
0.0
;
string
source
;
for
(
int
i
=
0
;
i
<
32
;
i
++
)
{
if
((
groups
&
(
1
<<
i
))
!=
0
)
{
cutoff
=
max
(
cutoff
,
groupCutoff
[
i
]);
source
+=
groupKernelSource
[
i
];
}
}
kernels
.
hasForces
=
(
source
.
size
()
>
0
);
kernels
.
cutoffDistance
=
cutoff
;
if
(
kernels
.
hasForces
)
kernels
.
forceKernel
=
createInteractionKernel
(
source
,
parameters
,
arguments
,
true
,
true
,
groups
);
if
(
useCutoff
&&
kernels
.
hasForces
)
{
double
padding
=
(
usePadding
?
0.1
*
cutoff
:
0.0
);
double
paddedCutoff
=
cutoff
+
padding
;
map
<
string
,
string
>
defines
;
defines
[
"TILE_SIZE"
]
=
context
.
intToString
(
CudaContext
::
TileSize
);
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getNumAtoms
());
defines
[
"PADDING"
]
=
context
.
doubleToString
(
padding
);
defines
[
"PADDED_CUTOFF"
]
=
context
.
doubleToString
(
paddedCutoff
);
defines
[
"PADDED_CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
paddedCutoff
*
paddedCutoff
);
defines
[
"NUM_TILES_WITH_EXCLUSIONS"
]
=
context
.
intToString
(
exclusionTiles
->
getSize
());
if
(
usePeriodic
)
defines
[
"USE_PERIODIC"
]
=
"1"
;
defines
[
"MAX_EXCLUSIONS"
]
=
context
.
intToString
(
maxExclusions
);
CUmodule
interactingBlocksProgram
=
context
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
findInteractingBlocks
,
defines
);
kernels
.
findBlockBoundsKernel
=
context
.
getKernel
(
interactingBlocksProgram
,
"findBlockBounds"
);
kernels
.
sortBoxDataKernel
=
context
.
getKernel
(
interactingBlocksProgram
,
"sortBoxData"
);
kernels
.
findInteractingBlocksKernel
=
context
.
getKernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
}
groupKernels
[
groups
]
=
kernels
;
}
CUfunction
CudaNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
vector
<
ParameterInfo
>&
params
,
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
)
{
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
const
string
suffixes
[]
=
{
"x"
,
"y"
,
"z"
,
"w"
};
const
string
suffixes
[]
=
{
"x"
,
"y"
,
"z"
,
"w"
};
...
@@ -588,8 +651,16 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
...
@@ -588,8 +651,16 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
if
(
useShuffle
)
if
(
useShuffle
)
defines
[
"ENABLE_SHUFFLE"
]
=
"1"
;
defines
[
"ENABLE_SHUFFLE"
]
=
"1"
;
defines
[
"THREAD_BLOCK_SIZE"
]
=
context
.
intToString
(
forceThreadBlockSize
);
defines
[
"THREAD_BLOCK_SIZE"
]
=
context
.
intToString
(
forceThreadBlockSize
);
defines
[
"CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
cutoff
*
cutoff
);
double
maxCutoff
=
0.0
;
defines
[
"CUTOFF"
]
=
context
.
doubleToString
(
cutoff
);
for
(
int
i
=
0
;
i
<
32
;
i
++
)
{
if
((
groups
&
(
1
<<
i
))
!=
0
)
{
double
cutoff
=
groupCutoff
[
i
];
maxCutoff
=
max
(
maxCutoff
,
cutoff
);
defines
[
"CUTOFF_"
+
context
.
intToString
(
i
)
+
"_SQUARED"
]
=
context
.
doubleToString
(
cutoff
*
cutoff
);
defines
[
"CUTOFF_"
+
context
.
intToString
(
i
)]
=
context
.
doubleToString
(
cutoff
);
}
}
defines
[
"MAX_CUTOFF"
]
=
context
.
doubleToString
(
maxCutoff
);
defines
[
"NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
context
.
intToString
(
context
.
getNumAtomBlocks
());
...
@@ -605,33 +676,5 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
...
@@ -605,33 +676,5 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
defines
[
"PARAMETER_SIZE_IS_EVEN"
]
=
"1"
;
defines
[
"PARAMETER_SIZE_IS_EVEN"
]
=
"1"
;
CUmodule
program
=
context
.
createModule
(
CudaKernelSources
::
vectorOps
+
context
.
replaceStrings
(
CudaKernelSources
::
nonbonded
,
replacements
),
defines
);
CUmodule
program
=
context
.
createModule
(
CudaKernelSources
::
vectorOps
+
context
.
replaceStrings
(
CudaKernelSources
::
nonbonded
,
replacements
),
defines
);
CUfunction
kernel
=
context
.
getKernel
(
program
,
"computeNonbonded"
);
CUfunction
kernel
=
context
.
getKernel
(
program
,
"computeNonbonded"
);
// Set arguments to the Kernel.
int
index
=
0
;
forceArgs
.
push_back
(
&
context
.
getForce
().
getDevicePointer
());
forceArgs
.
push_back
(
&
context
.
getEnergyBuffer
().
getDevicePointer
());
forceArgs
.
push_back
(
&
context
.
getPosq
().
getDevicePointer
());
forceArgs
.
push_back
(
&
exclusions
->
getDevicePointer
());
forceArgs
.
push_back
(
&
exclusionTiles
->
getDevicePointer
());
forceArgs
.
push_back
(
&
startTileIndex
);
forceArgs
.
push_back
(
&
numTiles
);
if
(
useCutoff
)
{
forceArgs
.
push_back
(
&
interactingTiles
->
getDevicePointer
());
forceArgs
.
push_back
(
&
interactionCount
->
getDevicePointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxSizePointer
());
forceArgs
.
push_back
(
context
.
getInvPeriodicBoxSizePointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxVecXPointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxVecYPointer
());
forceArgs
.
push_back
(
context
.
getPeriodicBoxVecZPointer
());
forceArgs
.
push_back
(
&
maxTiles
);
forceArgs
.
push_back
(
&
blockCenter
->
getDevicePointer
());
forceArgs
.
push_back
(
&
blockBoundingBox
->
getDevicePointer
());
forceArgs
.
push_back
(
&
interactingAtoms
->
getDevicePointer
());
}
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
forceArgs
.
push_back
(
&
params
[
i
].
getMemory
());
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
forceArgs
.
push_back
(
&
arguments
[
i
].
getMemory
());
return
kernel
;
return
kernel
;
}
}
platforms/cuda/src/kernels/nonbonded.cu
View file @
85532a52
...
@@ -330,9 +330,9 @@ extern "C" __global__ void computeNonbonded(
...
@@ -330,9 +330,9 @@ extern "C" __global__ void computeNonbonded(
if
(
numTiles
<=
maxTiles
)
{
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
MAX_
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
MAX_
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
MAX_
CUTOFF
);
}
}
else
else
#endif
#endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment