Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
a47300e6
Commit
a47300e6
authored
Sep 21, 2015
by
Peter Eastman
Browse files
Changes to generating nonbonded kernels
parent
a2d5b985
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
118 additions
and
37 deletions
+118
-37
platforms/cuda/include/CudaNonbondedUtilities.h
platforms/cuda/include/CudaNonbondedUtilities.h
+10
-3
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+1
-1
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+12
-5
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+21
-5
platforms/opencl/include/OpenCLNonbondedUtilities.h
platforms/opencl/include/OpenCLNonbondedUtilities.h
+10
-3
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+1
-1
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+46
-16
platforms/opencl/src/kernels/nonbonded.cl
platforms/opencl/src/kernels/nonbonded.cl
+16
-2
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+1
-1
No files found.
platforms/cuda/include/CudaNonbondedUtilities.h
View file @
a47300e6
...
...
@@ -138,8 +138,12 @@ public:
void
prepareInteractions
(
int
forceGroups
);
/**
* Compute the nonbonded interactions.
*
* @param forceGroups the flags specifying which force groups to include
* @param includeForces whether to compute forces
* @param includeEnergy whether to compute the potential energy
*/
void
computeInteractions
(
int
forceGroups
);
void
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Check to see if the neighbor list arrays are large enough, and make them bigger if necessary.
*
...
...
@@ -235,8 +239,10 @@ public:
* @param useExclusions specifies whether exclusions are applied to this interaction
* @param isSymmetric specifies whether the interaction is symmetric
* @param groups the set of force groups this kernel is for
* @param includeForces whether this kernel should compute forces
* @param includeEnergy whether this kernel should compute potential energy
*/
CUfunction
createInteractionKernel
(
const
std
::
string
&
source
,
std
::
vector
<
ParameterInfo
>&
params
,
std
::
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
);
CUfunction
createInteractionKernel
(
const
std
::
string
&
source
,
std
::
vector
<
ParameterInfo
>&
params
,
std
::
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Create the set of kernels that will be needed for a particular combination of force groups.
*
...
...
@@ -282,7 +288,8 @@ class CudaNonbondedUtilities::KernelSet {
public:
bool
hasForces
;
double
cutoffDistance
;
CUfunction
forceKernel
;
std
::
string
source
;
CUfunction
forceKernel
,
energyKernel
,
forceEnergyKernel
;
CUfunction
findBlockBoundsKernel
;
CUfunction
sortBoxDataKernel
;
CUfunction
findInteractingBlocksKernel
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
a47300e6
...
...
@@ -105,7 +105,7 @@ void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool
double
CudaCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
int
groups
,
bool
&
valid
)
{
cu
.
getBondedUtilities
().
computeInteractions
(
groups
);
cu
.
getNonbondedUtilities
().
computeInteractions
(
groups
);
cu
.
getNonbondedUtilities
().
computeInteractions
(
groups
,
includeForces
,
includeEnergy
);
double
sum
=
0.0
;
for
(
vector
<
CudaContext
::
ForcePostComputation
*>::
iterator
iter
=
cu
.
getPostComputations
().
begin
();
iter
!=
cu
.
getPostComputations
().
end
();
++
iter
)
sum
+=
(
*
iter
)
->
computeForceAndEnergy
(
includeForces
,
includeEnergy
,
groups
);
...
...
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
a47300e6
...
...
@@ -388,12 +388,15 @@ void CudaNonbondedUtilities::prepareInteractions(int forceGroups) {
lastCutoff
=
kernels
.
cutoffDistance
;
}
void
CudaNonbondedUtilities
::
computeInteractions
(
int
forceGroups
)
{
void
CudaNonbondedUtilities
::
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
)
{
if
((
forceGroups
&
groupFlags
)
==
0
)
return
;
KernelSet
&
kernels
=
groupKernels
[
forceGroups
];
if
(
kernels
.
hasForces
)
{
context
.
executeKernel
(
kernels
.
forceKernel
,
&
forceArgs
[
0
],
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
CUfunction
&
kernel
=
(
includeForces
?
(
includeEnergy
?
kernels
.
forceEnergyKernel
:
kernels
.
forceKernel
)
:
kernels
.
energyKernel
);
if
(
kernel
==
NULL
)
kernel
=
createInteractionKernel
(
kernels
.
source
,
parameters
,
arguments
,
true
,
true
,
forceGroups
,
includeForces
,
includeEnergy
);
context
.
executeKernel
(
kernel
,
&
forceArgs
[
0
],
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
}
...
...
@@ -454,8 +457,8 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
}
kernels
.
hasForces
=
(
source
.
size
()
>
0
);
kernels
.
cutoffDistance
=
cutoff
;
if
(
kernels
.
hasForces
)
kernels
.
forceKernel
=
createInteractionKernel
(
source
,
parameters
,
arguments
,
true
,
true
,
groups
)
;
kernels
.
source
=
source
;
kernels
.
forceKernel
=
kernels
.
energyKernel
=
kernels
.
forceEnergyKernel
=
NULL
;
if
(
useCutoff
)
{
double
padding
=
(
usePadding
?
0.1
*
cutoff
:
0.0
);
double
paddedCutoff
=
cutoff
+
padding
;
...
...
@@ -478,7 +481,7 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
groupKernels
[
groups
]
=
kernels
;
}
CUfunction
CudaNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
vector
<
ParameterInfo
>&
params
,
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
)
{
CUfunction
CudaNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
vector
<
ParameterInfo
>&
params
,
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
,
bool
includeForces
,
bool
includeEnergy
)
{
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
const
string
suffixes
[]
=
{
"x"
,
"y"
,
"z"
,
"w"
};
...
...
@@ -654,6 +657,10 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
defines
[
"USE_SYMMETRIC"
]
=
"1"
;
if
(
useShuffle
)
defines
[
"ENABLE_SHUFFLE"
]
=
"1"
;
if
(
includeForces
)
defines
[
"INCLUDE_FORCES"
]
=
"1"
;
if
(
includeEnergy
)
defines
[
"INCLUDE_ENERGY"
]
=
"1"
;
defines
[
"THREAD_BLOCK_SIZE"
]
=
context
.
intToString
(
forceThreadBlockSize
);
double
maxCutoff
=
0.0
;
for
(
int
i
=
0
;
i
<
32
;
i
++
)
{
...
...
platforms/cuda/src/kernels/nonbonded.cu
View file @
a47300e6
...
...
@@ -112,7 +112,7 @@ extern "C" __global__ void computeNonbonded(
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
// global warpIndex
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
// index within the warp
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
// block warpIndex
real
energy
=
0
.0
f
;
mixed
energy
=
0
;
// used shared memory if the device cannot shuffle
#ifndef ENABLE_SHUFFLE
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
...
...
@@ -175,6 +175,7 @@ extern "C" __global__ void computeNonbonded(
real
tempEnergy
=
0.0
f
;
COMPUTE_INTERACTION
energy
+=
0.5
f
*
tempEnergy
;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
force
.
x
-=
delta
.
x
*
dEdR
;
force
.
y
-=
delta
.
y
*
dEdR
;
...
...
@@ -184,6 +185,7 @@ extern "C" __global__ void computeNonbonded(
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
#endif
#endif
#ifdef USE_EXCLUSIONS
excl
>>=
1
;
#endif
...
...
@@ -241,6 +243,7 @@ extern "C" __global__ void computeNonbonded(
real
tempEnergy
=
0.0
f
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
...
...
@@ -270,11 +273,12 @@ extern "C" __global__ void computeNonbonded(
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
#endif
#endif // end USE_SYMMETRIC
#ifdef USE_EXCLUSIONS
excl
>>=
1
;
#endif
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
#endif
#ifdef USE_EXCLUSIONS
excl
>>=
1
;
#endif
// cycles the indices
// 0 1 2 3 4 5 6 7 -> 1 2 3 4 5 6 7 0
...
...
@@ -282,6 +286,7 @@ extern "C" __global__ void computeNonbonded(
}
const
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
// write results for off diagonal tiles
#ifdef INCLUDE_FORCES
#ifdef ENABLE_SHUFFLE
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
y
*
0x100000000
)));
...
...
@@ -290,13 +295,16 @@ extern "C" __global__ void computeNonbonded(
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fx
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fy
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fz
*
0x100000000
)));
#endif
#endif
}
// Write results for on and off diagonal tiles
#ifdef INCLUDE_FORCES
const
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
z
*
0x100000000
)));
#endif
}
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
...
...
@@ -441,6 +449,7 @@ extern "C" __global__ void computeNonbonded(
real
tempEnergy
=
0.0
f
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
...
...
@@ -472,6 +481,7 @@ extern "C" __global__ void computeNonbonded(
#endif // end USE_SYMMETRIC
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
#endif
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
...
...
@@ -509,6 +519,7 @@ extern "C" __global__ void computeNonbonded(
real
tempEnergy
=
0.0
f
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
...
...
@@ -540,12 +551,14 @@ extern "C" __global__ void computeNonbonded(
#endif // end USE_SYMMETRIC
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
#endif
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
}
// Write results.
#ifdef INCLUDE_FORCES
atomicAdd
(
&
forceBuffers
[
atom1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
z
*
0x100000000
)));
...
...
@@ -565,8 +578,11 @@ extern "C" __global__ void computeNonbonded(
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fz
*
0x100000000
)));
#endif
}
#endif
}
pos
++
;
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
#ifdef INCLUDE_ENERGY
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
(
real
)
energy
;
#endif
}
\ No newline at end of file
platforms/opencl/include/OpenCLNonbondedUtilities.h
View file @
a47300e6
...
...
@@ -150,8 +150,12 @@ public:
void
prepareInteractions
(
int
forceGroups
);
/**
* Compute the nonbonded interactions.
*
* @param forceGroups the flags specifying which force groups to include
* @param includeForces whether to compute forces
* @param includeEnergy whether to compute the potential energy
*/
void
computeInteractions
(
int
forceGroups
);
void
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Check to see if the neighbor list arrays are large enough, and make them bigger if necessary.
*
...
...
@@ -247,8 +251,10 @@ public:
* @param useExclusions specifies whether exclusions are applied to this interaction
* @param isSymmetric specifies whether the interaction is symmetric
* @param groups the set of force groups this kernel is for
* @param includeForces whether this kernel should compute forces
* @param includeEnergy whether this kernel should compute potential energy
*/
cl
::
Kernel
createInteractionKernel
(
const
std
::
string
&
source
,
const
std
::
vector
<
ParameterInfo
>&
params
,
const
std
::
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
);
cl
::
Kernel
createInteractionKernel
(
const
std
::
string
&
source
,
const
std
::
vector
<
ParameterInfo
>&
params
,
const
std
::
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Create the set of kernels that will be needed for a particular combination of force groups.
*
...
...
@@ -294,7 +300,8 @@ class OpenCLNonbondedUtilities::KernelSet {
public:
bool
hasForces
;
double
cutoffDistance
;
cl
::
Kernel
forceKernel
;
std
::
string
source
;
cl
::
Kernel
forceKernel
,
energyKernel
,
forceEnergyKernel
;
cl
::
Kernel
findBlockBoundsKernel
;
cl
::
Kernel
sortBoxDataKernel
;
cl
::
Kernel
findInteractingBlocksKernel
;
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
a47300e6
...
...
@@ -128,7 +128,7 @@ void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, boo
double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) {
cl.getBondedUtilities().computeInteractions(groups);
cl.getNonbondedUtilities().computeInteractions(groups);
cl.getNonbondedUtilities().computeInteractions(groups
, includeForces, includeEnergy
);
double sum = 0.0;
for (vector<OpenCLContext::ForcePostComputation*>::iterator iter = cl.getPostComputations().begin(); iter != cl.getPostComputations().end(); ++iter)
sum += (*iter)->computeForceAndEnergy(includeForces, includeEnergy, groups);
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
a47300e6
...
...
@@ -373,14 +373,17 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
lastCutoff
=
kernels
.
cutoffDistance
;
}
void
OpenCLNonbondedUtilities
::
computeInteractions
(
int
forceGroups
)
{
void
OpenCLNonbondedUtilities
::
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
)
{
if
((
forceGroups
&
groupFlags
)
==
0
)
return
;
KernelSet
&
kernels
=
groupKernels
[
forceGroups
];
if
(
kernels
.
hasForces
)
{
cl
::
Kernel
&
kernel
=
(
includeForces
?
(
includeEnergy
?
kernels
.
forceEnergyKernel
:
kernels
.
forceKernel
)
:
kernels
.
energyKernel
);
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernel
)
==
NULL
)
kernel
=
createInteractionKernel
(
kernels
.
source
,
parameters
,
arguments
,
true
,
true
,
forceGroups
,
includeForces
,
includeEnergy
);
if
(
useCutoff
)
setPeriodicBoxArgs
(
context
,
kernel
s
.
forceKernel
,
9
);
context
.
executeKernel
(
kernel
s
.
forceKernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
setPeriodicBoxArgs
(
context
,
kernel
,
9
);
context
.
executeKernel
(
kernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
}
...
...
@@ -406,12 +409,25 @@ bool OpenCLNonbondedUtilities::updateNeighborListSize() {
interactingTiles
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingAtoms
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
OpenCLContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
for
(
map
<
int
,
KernelSet
>::
iterator
iter
=
groupKernels
.
begin
();
iter
!=
groupKernels
.
end
();
++
iter
)
{
iter
->
second
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
14
,
maxTiles
);
iter
->
second
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
->
getDeviceBuffer
());
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
->
getDeviceBuffer
());
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
KernelSet
&
kernels
=
iter
->
second
;
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
forceKernel
)
!=
NULL
)
{
kernels
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
forceKernel
.
setArg
<
cl_uint
>
(
14
,
maxTiles
);
kernels
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
->
getDeviceBuffer
());
}
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
energyKernel
)
!=
NULL
)
{
kernels
.
energyKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
energyKernel
.
setArg
<
cl_uint
>
(
14
,
maxTiles
);
kernels
.
energyKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
->
getDeviceBuffer
());
}
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
forceEnergyKernel
)
!=
NULL
)
{
kernels
.
forceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
forceEnergyKernel
.
setArg
<
cl_uint
>
(
14
,
maxTiles
);
kernels
.
forceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
->
getDeviceBuffer
());
}
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
}
forceRebuildNeighborList
=
true
;
return
true
;
...
...
@@ -432,10 +448,21 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
// We are using a cutoff, and the kernels have already been created.
for
(
map
<
int
,
KernelSet
>::
iterator
iter
=
groupKernels
.
begin
();
iter
!=
groupKernels
.
end
();
++
iter
)
{
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startBlockIndex
);
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
numBlocks
);
KernelSet
&
kernels
=
iter
->
second
;
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
forceKernel
)
!=
NULL
)
{
kernels
.
forceKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
kernels
.
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
}
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
energyKernel
)
!=
NULL
)
{
kernels
.
energyKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
kernels
.
energyKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
}
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
forceEnergyKernel
)
!=
NULL
)
{
kernels
.
forceEnergyKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
kernels
.
forceEnergyKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
}
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startBlockIndex
);
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
numBlocks
);
}
forceRebuildNeighborList
=
true
;
}
...
...
@@ -453,8 +480,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
}
kernels
.
hasForces
=
(
source
.
size
()
>
0
);
kernels
.
cutoffDistance
=
cutoff
;
if
(
kernels
.
hasForces
)
kernels
.
forceKernel
=
createInteractionKernel
(
source
,
parameters
,
arguments
,
true
,
true
,
groups
);
kernels
.
source
=
source
;
if
(
useCutoff
)
{
double
padding
=
(
usePadding
?
0.1
*
cutoff
:
0.0
);
double
paddedCutoff
=
cutoff
+
padding
;
...
...
@@ -524,7 +550,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
groupKernels
[
groups
]
=
kernels
;
}
cl
::
Kernel
OpenCLNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
const
vector
<
ParameterInfo
>&
params
,
const
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
)
{
cl
::
Kernel
OpenCLNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
const
vector
<
ParameterInfo
>&
params
,
const
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
,
bool
includeForces
,
bool
includeEnergy
)
{
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
const
string
suffixes
[]
=
{
"x"
,
"y"
,
"z"
,
"w"
};
...
...
@@ -623,6 +649,10 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines
[
"USE_SYMMETRIC"
]
=
"1"
;
if
(
useCutoff
&&
context
.
getSIMDWidth
()
<
32
)
defines
[
"PRUNE_BY_CUTOFF"
]
=
"1"
;
if
(
includeForces
)
defines
[
"INCLUDE_FORCES"
]
=
"1"
;
if
(
includeEnergy
)
defines
[
"INCLUDE_ENERGY"
]
=
"1"
;
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
context
.
intToString
(
forceThreadBlockSize
);
double
maxCutoff
=
0.0
;
for
(
int
i
=
0
;
i
<
32
;
i
++
)
{
...
...
platforms/opencl/src/kernels/nonbonded.cl
View file @
a47300e6
...
...
@@ -34,7 +34,7 @@ __kernel void computeNonbonded(
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
real
energy
=
0
;
mixed
energy
=
0
;
__local
AtomData
localData[FORCE_WORK_GROUP_SIZE]
;
//
First
loop:
process
tiles
that
contain
exclusions.
...
...
@@ -87,11 +87,13 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
COMPUTE_INTERACTION
energy += 0.5f*tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
force.xyz -= delta.xyz*dEdR;
#else
force.xyz -= dEdR1.xyz;
#endif
#endif
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
...
...
@@ -144,6 +146,7 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
...
...
@@ -156,6 +159,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif
#ifdef PRUNE_BY_CUTOFF
}
#endif
...
...
@@ -169,6 +173,7 @@ __kernel void computeNonbonded(
// Write results.
#ifdef INCLUDE_FORCES
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) (force.x*0x100000000));
...
...
@@ -186,6 +191,7 @@ __kernel void computeNonbonded(
forceBuffers[offset1].xyz += force.xyz;
if (x != y)
forceBuffers[offset2] += (real4) (localData[get_local_id(0)].fx, localData[get_local_id(0)].fy, localData[get_local_id(0)].fz, 0.0f);
#endif
#endif
}
...
...
@@ -318,6 +324,7 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
...
...
@@ -330,6 +337,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif
#ifdef PRUNE_BY_CUTOFF
}
#endif
...
...
@@ -370,6 +378,7 @@ __kernel void computeNonbonded(
real
tempEnergy
=
0
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
#
ifdef
INCLUDE_FORCES
#
ifdef
USE_SYMMETRIC
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
...
...
@@ -382,6 +391,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy
+=
dEdR2.y
;
localData[tbx+tj].fz
+=
dEdR2.z
;
#
endif
#
endif
#
ifdef
PRUNE_BY_CUTOFF
}
#
endif
...
...
@@ -392,6 +402,7 @@ __kernel void computeNonbonded(
//
Write
results.
#
ifdef
INCLUDE_FORCES
#
ifdef
USE_CUTOFF
unsigned
int
atom2
=
atomIndices[get_local_id
(
0
)
]
;
#
else
...
...
@@ -412,9 +423,12 @@ __kernel void computeNonbonded(
forceBuffers[offset1].xyz
+=
force.xyz
;
if
(
atom2
<
PADDED_NUM_ATOMS
)
forceBuffers[offset2]
+=
(
real4
)
(
localData[get_local_id
(
0
)
].fx,
localData[get_local_id
(
0
)
].fy,
localData[get_local_id
(
0
)
].fz,
0.0f
)
;
#
endif
#
endif
}
pos++
;
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
#
ifdef
INCLUDE_ENERGY
energyBuffer[get_global_id
(
0
)
]
+=
(
real
)
energy
;
#
endif
}
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
a47300e6
...
...
@@ -2407,7 +2407,7 @@ double CudaCalcAmoebaVdwForceKernel::execute(ContextImpl& context, bool includeF
&
bondReductionAtoms
->
getDevicePointer
(),
&
bondReductionFactors
->
getDevicePointer
()};
cu
.
executeKernel
(
prepareKernel
,
prepareArgs
,
cu
.
getPaddedNumAtoms
());
nonbonded
->
prepareInteractions
(
1
);
nonbonded
->
computeInteractions
(
1
);
nonbonded
->
computeInteractions
(
1
,
includeForces
,
includeEnergy
);
void
*
spreadArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
tempForces
->
getDevicePointer
(),
&
bondReductionAtoms
->
getDevicePointer
(),
&
bondReductionFactors
->
getDevicePointer
()};
cu
.
executeKernel
(
spreadKernel
,
spreadArgs
,
cu
.
getPaddedNumAtoms
());
tempPosq
->
copyTo
(
cu
.
getPosq
());
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment