Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b11af401
Commit
b11af401
authored
Sep 23, 2015
by
peastman
Browse files
Merge pull request #1140 from peastman/energy
Changes to generating nonbonded kernels
parents
a2d5b985
a47300e6
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
118 additions
and
37 deletions
+118
-37
platforms/cuda/include/CudaNonbondedUtilities.h
platforms/cuda/include/CudaNonbondedUtilities.h
+10
-3
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+1
-1
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+12
-5
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+21
-5
platforms/opencl/include/OpenCLNonbondedUtilities.h
platforms/opencl/include/OpenCLNonbondedUtilities.h
+10
-3
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+1
-1
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+46
-16
platforms/opencl/src/kernels/nonbonded.cl
platforms/opencl/src/kernels/nonbonded.cl
+16
-2
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+1
-1
No files found.
platforms/cuda/include/CudaNonbondedUtilities.h
View file @
b11af401
...
...
@@ -138,8 +138,12 @@ public:
void
prepareInteractions
(
int
forceGroups
);
/**
* Compute the nonbonded interactions.
*
* @param forceGroups the flags specifying which force groups to include
* @param includeForces whether to compute forces
* @param includeEnergy whether to compute the potential energy
*/
void
computeInteractions
(
int
forceGroups
);
void
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Check to see if the neighbor list arrays are large enough, and make them bigger if necessary.
*
...
...
@@ -235,8 +239,10 @@ public:
* @param useExclusions specifies whether exclusions are applied to this interaction
* @param isSymmetric specifies whether the interaction is symmetric
* @param groups the set of force groups this kernel is for
* @param includeForces whether this kernel should compute forces
* @param includeEnergy whether this kernel should compute potential energy
*/
CUfunction
createInteractionKernel
(
const
std
::
string
&
source
,
std
::
vector
<
ParameterInfo
>&
params
,
std
::
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
);
CUfunction
createInteractionKernel
(
const
std
::
string
&
source
,
std
::
vector
<
ParameterInfo
>&
params
,
std
::
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Create the set of kernels that will be needed for a particular combination of force groups.
*
...
...
@@ -282,7 +288,8 @@ class CudaNonbondedUtilities::KernelSet {
public:
bool
hasForces
;
double
cutoffDistance
;
CUfunction
forceKernel
;
std
::
string
source
;
CUfunction
forceKernel
,
energyKernel
,
forceEnergyKernel
;
CUfunction
findBlockBoundsKernel
;
CUfunction
sortBoxDataKernel
;
CUfunction
findInteractingBlocksKernel
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
b11af401
...
...
@@ -105,7 +105,7 @@ void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool
double
CudaCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
int
groups
,
bool
&
valid
)
{
cu
.
getBondedUtilities
().
computeInteractions
(
groups
);
cu
.
getNonbondedUtilities
().
computeInteractions
(
groups
);
cu
.
getNonbondedUtilities
().
computeInteractions
(
groups
,
includeForces
,
includeEnergy
);
double
sum
=
0.0
;
for
(
vector
<
CudaContext
::
ForcePostComputation
*>::
iterator
iter
=
cu
.
getPostComputations
().
begin
();
iter
!=
cu
.
getPostComputations
().
end
();
++
iter
)
sum
+=
(
*
iter
)
->
computeForceAndEnergy
(
includeForces
,
includeEnergy
,
groups
);
...
...
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
b11af401
...
...
@@ -388,12 +388,15 @@ void CudaNonbondedUtilities::prepareInteractions(int forceGroups) {
lastCutoff
=
kernels
.
cutoffDistance
;
}
void
CudaNonbondedUtilities
::
computeInteractions
(
int
forceGroups
)
{
void
CudaNonbondedUtilities
::
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
)
{
if
((
forceGroups
&
groupFlags
)
==
0
)
return
;
KernelSet
&
kernels
=
groupKernels
[
forceGroups
];
if
(
kernels
.
hasForces
)
{
context
.
executeKernel
(
kernels
.
forceKernel
,
&
forceArgs
[
0
],
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
CUfunction
&
kernel
=
(
includeForces
?
(
includeEnergy
?
kernels
.
forceEnergyKernel
:
kernels
.
forceKernel
)
:
kernels
.
energyKernel
);
if
(
kernel
==
NULL
)
kernel
=
createInteractionKernel
(
kernels
.
source
,
parameters
,
arguments
,
true
,
true
,
forceGroups
,
includeForces
,
includeEnergy
);
context
.
executeKernel
(
kernel
,
&
forceArgs
[
0
],
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
}
...
...
@@ -454,8 +457,8 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
}
kernels
.
hasForces
=
(
source
.
size
()
>
0
);
kernels
.
cutoffDistance
=
cutoff
;
if
(
kernels
.
hasForces
)
kernels
.
forceKernel
=
createInteractionKernel
(
source
,
parameters
,
arguments
,
true
,
true
,
groups
)
;
kernels
.
source
=
source
;
kernels
.
forceKernel
=
kernels
.
energyKernel
=
kernels
.
forceEnergyKernel
=
NULL
;
if
(
useCutoff
)
{
double
padding
=
(
usePadding
?
0.1
*
cutoff
:
0.0
);
double
paddedCutoff
=
cutoff
+
padding
;
...
...
@@ -478,7 +481,7 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
groupKernels
[
groups
]
=
kernels
;
}
CUfunction
CudaNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
vector
<
ParameterInfo
>&
params
,
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
)
{
CUfunction
CudaNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
vector
<
ParameterInfo
>&
params
,
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
,
bool
includeForces
,
bool
includeEnergy
)
{
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
const
string
suffixes
[]
=
{
"x"
,
"y"
,
"z"
,
"w"
};
...
...
@@ -654,6 +657,10 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
defines
[
"USE_SYMMETRIC"
]
=
"1"
;
if
(
useShuffle
)
defines
[
"ENABLE_SHUFFLE"
]
=
"1"
;
if
(
includeForces
)
defines
[
"INCLUDE_FORCES"
]
=
"1"
;
if
(
includeEnergy
)
defines
[
"INCLUDE_ENERGY"
]
=
"1"
;
defines
[
"THREAD_BLOCK_SIZE"
]
=
context
.
intToString
(
forceThreadBlockSize
);
double
maxCutoff
=
0.0
;
for
(
int
i
=
0
;
i
<
32
;
i
++
)
{
...
...
platforms/cuda/src/kernels/nonbonded.cu
View file @
b11af401
...
...
@@ -112,7 +112,7 @@ extern "C" __global__ void computeNonbonded(
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
// global warpIndex
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
// index within the warp
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
// block warpIndex
real
energy
=
0
.0
f
;
mixed
energy
=
0
;
// used shared memory if the device cannot shuffle
#ifndef ENABLE_SHUFFLE
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
...
...
@@ -175,6 +175,7 @@ extern "C" __global__ void computeNonbonded(
real
tempEnergy
=
0.0
f
;
COMPUTE_INTERACTION
energy
+=
0.5
f
*
tempEnergy
;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
force
.
x
-=
delta
.
x
*
dEdR
;
force
.
y
-=
delta
.
y
*
dEdR
;
...
...
@@ -184,6 +185,7 @@ extern "C" __global__ void computeNonbonded(
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
#endif
#endif
#ifdef USE_EXCLUSIONS
excl
>>=
1
;
#endif
...
...
@@ -241,6 +243,7 @@ extern "C" __global__ void computeNonbonded(
real
tempEnergy
=
0.0
f
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
...
...
@@ -270,11 +273,12 @@ extern "C" __global__ void computeNonbonded(
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
#endif
#endif // end USE_SYMMETRIC
#ifdef USE_EXCLUSIONS
excl
>>=
1
;
#endif
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
#endif
#ifdef USE_EXCLUSIONS
excl
>>=
1
;
#endif
// cycles the indices
// 0 1 2 3 4 5 6 7 -> 1 2 3 4 5 6 7 0
...
...
@@ -282,6 +286,7 @@ extern "C" __global__ void computeNonbonded(
}
const
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
// write results for off diagonal tiles
#ifdef INCLUDE_FORCES
#ifdef ENABLE_SHUFFLE
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
y
*
0x100000000
)));
...
...
@@ -290,13 +295,16 @@ extern "C" __global__ void computeNonbonded(
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fx
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fy
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fz
*
0x100000000
)));
#endif
#endif
}
// Write results for on and off diagonal tiles
#ifdef INCLUDE_FORCES
const
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
z
*
0x100000000
)));
#endif
}
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
...
...
@@ -441,6 +449,7 @@ extern "C" __global__ void computeNonbonded(
real
tempEnergy
=
0.0
f
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
...
...
@@ -472,6 +481,7 @@ extern "C" __global__ void computeNonbonded(
#endif // end USE_SYMMETRIC
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
#endif
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
...
...
@@ -509,6 +519,7 @@ extern "C" __global__ void computeNonbonded(
real
tempEnergy
=
0.0
f
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
...
...
@@ -540,12 +551,14 @@ extern "C" __global__ void computeNonbonded(
#endif // end USE_SYMMETRIC
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
#endif
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
}
// Write results.
#ifdef INCLUDE_FORCES
atomicAdd
(
&
forceBuffers
[
atom1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
z
*
0x100000000
)));
...
...
@@ -565,8 +578,11 @@ extern "C" __global__ void computeNonbonded(
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fz
*
0x100000000
)));
#endif
}
#endif
}
pos
++
;
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
#ifdef INCLUDE_ENERGY
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
(
real
)
energy
;
#endif
}
\ No newline at end of file
platforms/opencl/include/OpenCLNonbondedUtilities.h
View file @
b11af401
...
...
@@ -150,8 +150,12 @@ public:
void
prepareInteractions
(
int
forceGroups
);
/**
* Compute the nonbonded interactions.
*
* @param forceGroups the flags specifying which force groups to include
* @param includeForces whether to compute forces
* @param includeEnergy whether to compute the potential energy
*/
void
computeInteractions
(
int
forceGroups
);
void
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Check to see if the neighbor list arrays are large enough, and make them bigger if necessary.
*
...
...
@@ -247,8 +251,10 @@ public:
* @param useExclusions specifies whether exclusions are applied to this interaction
* @param isSymmetric specifies whether the interaction is symmetric
* @param groups the set of force groups this kernel is for
* @param includeForces whether this kernel should compute forces
* @param includeEnergy whether this kernel should compute potential energy
*/
cl
::
Kernel
createInteractionKernel
(
const
std
::
string
&
source
,
const
std
::
vector
<
ParameterInfo
>&
params
,
const
std
::
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
);
cl
::
Kernel
createInteractionKernel
(
const
std
::
string
&
source
,
const
std
::
vector
<
ParameterInfo
>&
params
,
const
std
::
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Create the set of kernels that will be needed for a particular combination of force groups.
*
...
...
@@ -294,7 +300,8 @@ class OpenCLNonbondedUtilities::KernelSet {
public:
bool
hasForces
;
double
cutoffDistance
;
cl
::
Kernel
forceKernel
;
std
::
string
source
;
cl
::
Kernel
forceKernel
,
energyKernel
,
forceEnergyKernel
;
cl
::
Kernel
findBlockBoundsKernel
;
cl
::
Kernel
sortBoxDataKernel
;
cl
::
Kernel
findInteractingBlocksKernel
;
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
b11af401
...
...
@@ -128,7 +128,7 @@ void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, boo
double
OpenCLCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
int
groups
,
bool
&
valid
)
{
cl
.
getBondedUtilities
().
computeInteractions
(
groups
);
cl
.
getNonbondedUtilities
().
computeInteractions
(
groups
);
cl
.
getNonbondedUtilities
().
computeInteractions
(
groups
,
includeForces
,
includeEnergy
);
double
sum
=
0.0
;
for
(
vector
<
OpenCLContext
::
ForcePostComputation
*>::
iterator
iter
=
cl
.
getPostComputations
().
begin
();
iter
!=
cl
.
getPostComputations
().
end
();
++
iter
)
sum
+=
(
*
iter
)
->
computeForceAndEnergy
(
includeForces
,
includeEnergy
,
groups
);
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
b11af401
...
...
@@ -373,14 +373,17 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
lastCutoff
=
kernels
.
cutoffDistance
;
}
void
OpenCLNonbondedUtilities
::
computeInteractions
(
int
forceGroups
)
{
void
OpenCLNonbondedUtilities
::
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
)
{
if
((
forceGroups
&
groupFlags
)
==
0
)
return
;
KernelSet
&
kernels
=
groupKernels
[
forceGroups
];
if
(
kernels
.
hasForces
)
{
cl
::
Kernel
&
kernel
=
(
includeForces
?
(
includeEnergy
?
kernels
.
forceEnergyKernel
:
kernels
.
forceKernel
)
:
kernels
.
energyKernel
);
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernel
)
==
NULL
)
kernel
=
createInteractionKernel
(
kernels
.
source
,
parameters
,
arguments
,
true
,
true
,
forceGroups
,
includeForces
,
includeEnergy
);
if
(
useCutoff
)
setPeriodicBoxArgs
(
context
,
kernel
s
.
forceKernel
,
9
);
context
.
executeKernel
(
kernel
s
.
forceKernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
setPeriodicBoxArgs
(
context
,
kernel
,
9
);
context
.
executeKernel
(
kernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
}
...
...
@@ -406,12 +409,25 @@ bool OpenCLNonbondedUtilities::updateNeighborListSize() {
interactingTiles
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingAtoms
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
OpenCLContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
for
(
map
<
int
,
KernelSet
>::
iterator
iter
=
groupKernels
.
begin
();
iter
!=
groupKernels
.
end
();
++
iter
)
{
iter
->
second
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
14
,
maxTiles
);
iter
->
second
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
->
getDeviceBuffer
());
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
->
getDeviceBuffer
());
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
KernelSet
&
kernels
=
iter
->
second
;
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
forceKernel
)
!=
NULL
)
{
kernels
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
forceKernel
.
setArg
<
cl_uint
>
(
14
,
maxTiles
);
kernels
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
->
getDeviceBuffer
());
}
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
energyKernel
)
!=
NULL
)
{
kernels
.
energyKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
energyKernel
.
setArg
<
cl_uint
>
(
14
,
maxTiles
);
kernels
.
energyKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
->
getDeviceBuffer
());
}
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
forceEnergyKernel
)
!=
NULL
)
{
kernels
.
forceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
forceEnergyKernel
.
setArg
<
cl_uint
>
(
14
,
maxTiles
);
kernels
.
forceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
->
getDeviceBuffer
());
}
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
}
forceRebuildNeighborList
=
true
;
return
true
;
...
...
@@ -432,10 +448,21 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
// We are using a cutoff, and the kernels have already been created.
for
(
map
<
int
,
KernelSet
>::
iterator
iter
=
groupKernels
.
begin
();
iter
!=
groupKernels
.
end
();
++
iter
)
{
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startBlockIndex
);
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
numBlocks
);
KernelSet
&
kernels
=
iter
->
second
;
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
forceKernel
)
!=
NULL
)
{
kernels
.
forceKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
kernels
.
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
}
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
energyKernel
)
!=
NULL
)
{
kernels
.
energyKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
kernels
.
energyKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
}
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
forceEnergyKernel
)
!=
NULL
)
{
kernels
.
forceEnergyKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
kernels
.
forceEnergyKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
}
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startBlockIndex
);
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
numBlocks
);
}
forceRebuildNeighborList
=
true
;
}
...
...
@@ -453,8 +480,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
}
kernels
.
hasForces
=
(
source
.
size
()
>
0
);
kernels
.
cutoffDistance
=
cutoff
;
if
(
kernels
.
hasForces
)
kernels
.
forceKernel
=
createInteractionKernel
(
source
,
parameters
,
arguments
,
true
,
true
,
groups
);
kernels
.
source
=
source
;
if
(
useCutoff
)
{
double
padding
=
(
usePadding
?
0.1
*
cutoff
:
0.0
);
double
paddedCutoff
=
cutoff
+
padding
;
...
...
@@ -524,7 +550,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
groupKernels
[
groups
]
=
kernels
;
}
cl
::
Kernel
OpenCLNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
const
vector
<
ParameterInfo
>&
params
,
const
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
)
{
cl
::
Kernel
OpenCLNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
const
vector
<
ParameterInfo
>&
params
,
const
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
,
bool
includeForces
,
bool
includeEnergy
)
{
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
const
string
suffixes
[]
=
{
"x"
,
"y"
,
"z"
,
"w"
};
...
...
@@ -623,6 +649,10 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines
[
"USE_SYMMETRIC"
]
=
"1"
;
if
(
useCutoff
&&
context
.
getSIMDWidth
()
<
32
)
defines
[
"PRUNE_BY_CUTOFF"
]
=
"1"
;
if
(
includeForces
)
defines
[
"INCLUDE_FORCES"
]
=
"1"
;
if
(
includeEnergy
)
defines
[
"INCLUDE_ENERGY"
]
=
"1"
;
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
context
.
intToString
(
forceThreadBlockSize
);
double
maxCutoff
=
0.0
;
for
(
int
i
=
0
;
i
<
32
;
i
++
)
{
...
...
platforms/opencl/src/kernels/nonbonded.cl
View file @
b11af401
...
...
@@ -34,7 +34,7 @@ __kernel void computeNonbonded(
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
real
energy
=
0
;
mixed
energy
=
0
;
__local
AtomData
localData[FORCE_WORK_GROUP_SIZE]
;
//
First
loop:
process
tiles
that
contain
exclusions.
...
...
@@ -87,11 +87,13 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
COMPUTE_INTERACTION
energy += 0.5f*tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
force.xyz -= delta.xyz*dEdR;
#else
force.xyz -= dEdR1.xyz;
#endif
#endif
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
...
...
@@ -144,6 +146,7 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
...
...
@@ -156,6 +159,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif
#ifdef PRUNE_BY_CUTOFF
}
#endif
...
...
@@ -169,6 +173,7 @@ __kernel void computeNonbonded(
// Write results.
#ifdef INCLUDE_FORCES
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) (force.x*0x100000000));
...
...
@@ -186,6 +191,7 @@ __kernel void computeNonbonded(
forceBuffers[offset1].xyz += force.xyz;
if (x != y)
forceBuffers[offset2] += (real4) (localData[get_local_id(0)].fx, localData[get_local_id(0)].fy, localData[get_local_id(0)].fz, 0.0f);
#endif
#endif
}
...
...
@@ -318,6 +324,7 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
...
...
@@ -330,6 +337,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif
#ifdef PRUNE_BY_CUTOFF
}
#endif
...
...
@@ -370,6 +378,7 @@ __kernel void computeNonbonded(
real
tempEnergy
=
0
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
#
ifdef
INCLUDE_FORCES
#
ifdef
USE_SYMMETRIC
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
...
...
@@ -382,6 +391,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy
+=
dEdR2.y
;
localData[tbx+tj].fz
+=
dEdR2.z
;
#
endif
#
endif
#
ifdef
PRUNE_BY_CUTOFF
}
#
endif
...
...
@@ -392,6 +402,7 @@ __kernel void computeNonbonded(
//
Write
results.
#
ifdef
INCLUDE_FORCES
#
ifdef
USE_CUTOFF
unsigned
int
atom2
=
atomIndices[get_local_id
(
0
)
]
;
#
else
...
...
@@ -412,9 +423,12 @@ __kernel void computeNonbonded(
forceBuffers[offset1].xyz
+=
force.xyz
;
if
(
atom2
<
PADDED_NUM_ATOMS
)
forceBuffers[offset2]
+=
(
real4
)
(
localData[get_local_id
(
0
)
].fx,
localData[get_local_id
(
0
)
].fy,
localData[get_local_id
(
0
)
].fz,
0.0f
)
;
#
endif
#
endif
}
pos++
;
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
#
ifdef
INCLUDE_ENERGY
energyBuffer[get_global_id
(
0
)
]
+=
(
real
)
energy
;
#
endif
}
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
b11af401
...
...
@@ -2407,7 +2407,7 @@ double CudaCalcAmoebaVdwForceKernel::execute(ContextImpl& context, bool includeF
&
bondReductionAtoms
->
getDevicePointer
(),
&
bondReductionFactors
->
getDevicePointer
()};
cu
.
executeKernel
(
prepareKernel
,
prepareArgs
,
cu
.
getPaddedNumAtoms
());
nonbonded
->
prepareInteractions
(
1
);
nonbonded
->
computeInteractions
(
1
);
nonbonded
->
computeInteractions
(
1
,
includeForces
,
includeEnergy
);
void
*
spreadArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
tempForces
->
getDevicePointer
(),
&
bondReductionAtoms
->
getDevicePointer
(),
&
bondReductionFactors
->
getDevicePointer
()};
cu
.
executeKernel
(
spreadKernel
,
spreadArgs
,
cu
.
getPaddedNumAtoms
());
tempPosq
->
copyTo
(
cu
.
getPosq
());
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment