Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
3250fb43
Commit
3250fb43
authored
May 24, 2016
by
peastman
Browse files
Continuing OpenCL implementation of GayBerneForce
parent
522e133e
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
175 additions
and
41 deletions
+175
-41
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+22
-7
platforms/opencl/src/kernels/gayBerne.cl
platforms/opencl/src/kernels/gayBerne.cl
+153
-34
No files found.
platforms/opencl/src/OpenCLKernels.cpp
View file @
3250fb43
...
@@ -6120,7 +6120,7 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
...
@@ -6120,7 +6120,7 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
sortedPos
=
new
OpenCLArray
(
cl
,
numRealParticles
,
4
*
elementSize
,
"sortedPos"
);
sortedPos
=
new
OpenCLArray
(
cl
,
numRealParticles
,
4
*
elementSize
,
"sortedPos"
);
maxNeighborBlocks
=
numRealParticles
*
2
;
maxNeighborBlocks
=
numRealParticles
*
2
;
neighbors
=
OpenCLArray
::
create
<
cl_int
>
(
cl
,
maxNeighborBlocks
*
32
,
"neighbors"
);
neighbors
=
OpenCLArray
::
create
<
cl_int
>
(
cl
,
maxNeighborBlocks
*
32
,
"neighbors"
);
neighborIndex
=
OpenCLArray
::
create
<
mm
_int
2
>
(
cl
,
maxNeighborBlocks
,
"neighbors"
);
neighborIndex
=
OpenCLArray
::
create
<
cl
_int
>
(
cl
,
maxNeighborBlocks
,
"neighbors"
);
neighborBlockCount
=
OpenCLArray
::
create
<
cl_int
>
(
cl
,
1
,
"neighborBlockCount"
);
neighborBlockCount
=
OpenCLArray
::
create
<
cl_int
>
(
cl
,
1
,
"neighborBlockCount"
);
// Create arrays for accumulating torques.
// Create arrays for accumulating torques.
...
@@ -6128,7 +6128,7 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
...
@@ -6128,7 +6128,7 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
if
(
cl
.
getSupports64BitGlobalAtomics
())
if
(
cl
.
getSupports64BitGlobalAtomics
())
torque
=
OpenCLArray
::
create
<
cl_long
>
(
cl
,
3
*
cl
.
getPaddedNumAtoms
(),
"torque"
);
torque
=
OpenCLArray
::
create
<
cl_long
>
(
cl
,
3
*
cl
.
getPaddedNumAtoms
(),
"torque"
);
else
else
torque
=
new
OpenCLArray
(
cl
,
cl
.
getPaddedNumAtoms
()
*
cl
.
getNonbondedUtilities
().
getNumForceBuffers
(),
elementSize
,
"torque"
);
torque
=
new
OpenCLArray
(
cl
,
cl
.
getPaddedNumAtoms
()
*
cl
.
getNonbondedUtilities
().
getNumForceBuffers
(),
4
*
elementSize
,
"torque"
);
cl
.
addAutoclearBuffer
(
*
torque
);
cl
.
addAutoclearBuffer
(
*
torque
);
// Create the kernels.
// Create the kernels.
...
@@ -6138,14 +6138,20 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
...
@@ -6138,14 +6138,20 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
bool
usePeriodic
=
(
nonbondedMethod
==
GayBerneForce
::
CutoffPeriodic
);
bool
usePeriodic
=
(
nonbondedMethod
==
GayBerneForce
::
CutoffPeriodic
);
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"USE_SWITCH"
]
=
(
useCutoff
&&
force
.
getUseSwitchingFunction
()
?
"1"
:
"0"
);
defines
[
"USE_SWITCH"
]
=
(
useCutoff
&&
force
.
getUseSwitchingFunction
()
?
"1"
:
"0"
);
double
cutoff
=
force
.
getCutoffDistance
();
defines
[
"CUTOFF_SQUARED"
]
=
cl
.
doubleToString
(
cutoff
*
cutoff
);
if
(
useCutoff
)
{
if
(
useCutoff
)
{
defines
[
"USE_CUTOFF"
]
=
1
;
if
(
usePeriodic
)
defines
[
"USE_PERIODIC"
]
=
"1"
;
// Compute the switching coefficients.
// Compute the switching coefficients.
if
(
force
.
getUseSwitchingFunction
())
{
if
(
force
.
getUseSwitchingFunction
())
{
defines
[
"SWITCH_CUTOFF"
]
=
cl
.
doubleToString
(
force
.
getSwitchingDistance
());
defines
[
"SWITCH_CUTOFF"
]
=
cl
.
doubleToString
(
force
.
getSwitchingDistance
());
defines
[
"SWITCH_C3"
]
=
cl
.
doubleToString
(
10
/
pow
(
force
.
getSwitchingDistance
()
-
force
.
getCutoffDistance
()
,
3.0
));
defines
[
"SWITCH_C3"
]
=
cl
.
doubleToString
(
10
/
pow
(
force
.
getSwitchingDistance
()
-
cutoff
,
3.0
));
defines
[
"SWITCH_C4"
]
=
cl
.
doubleToString
(
15
/
pow
(
force
.
getSwitchingDistance
()
-
force
.
getCutoffDistance
()
,
4.0
));
defines
[
"SWITCH_C4"
]
=
cl
.
doubleToString
(
15
/
pow
(
force
.
getSwitchingDistance
()
-
cutoff
,
4.0
));
defines
[
"SWITCH_C5"
]
=
cl
.
doubleToString
(
6
/
pow
(
force
.
getSwitchingDistance
()
-
force
.
getCutoffDistance
()
,
5.0
));
defines
[
"SWITCH_C5"
]
=
cl
.
doubleToString
(
6
/
pow
(
force
.
getSwitchingDistance
()
-
cutoff
,
5.0
));
}
}
}
}
if
(
!
cl
.
getSupports64BitGlobalAtomics
())
{
if
(
!
cl
.
getSupports64BitGlobalAtomics
())
{
...
@@ -6189,6 +6195,8 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
...
@@ -6189,6 +6195,8 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
neighborsKernel
.
setArg
<
cl
::
Buffer
>
(
10
,
neighbors
->
getDeviceBuffer
());
neighborsKernel
.
setArg
<
cl
::
Buffer
>
(
10
,
neighbors
->
getDeviceBuffer
());
neighborsKernel
.
setArg
<
cl
::
Buffer
>
(
11
,
neighborIndex
->
getDeviceBuffer
());
neighborsKernel
.
setArg
<
cl
::
Buffer
>
(
11
,
neighborIndex
->
getDeviceBuffer
());
neighborsKernel
.
setArg
<
cl
::
Buffer
>
(
12
,
neighborBlockCount
->
getDeviceBuffer
());
neighborsKernel
.
setArg
<
cl
::
Buffer
>
(
12
,
neighborBlockCount
->
getDeviceBuffer
());
neighborsKernel
.
setArg
<
cl
::
Buffer
>
(
13
,
exclusions
->
getDeviceBuffer
());
neighborsKernel
.
setArg
<
cl
::
Buffer
>
(
14
,
exclusionStartIndex
->
getDeviceBuffer
());
bool
useLong
=
cl
.
getSupports64BitGlobalAtomics
();
bool
useLong
=
cl
.
getSupports64BitGlobalAtomics
();
int
index
=
0
;
int
index
=
0
;
forceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
(
useLong
?
cl
.
getLongForceBuffer
().
getDeviceBuffer
()
:
cl
.
getForceBuffers
().
getDeviceBuffer
()));
forceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
(
useLong
?
cl
.
getLongForceBuffer
().
getDeviceBuffer
()
:
cl
.
getForceBuffers
().
getDeviceBuffer
()));
...
@@ -6207,6 +6215,12 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
...
@@ -6207,6 +6215,12 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
forceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exclusionStartIndex
->
getDeviceBuffer
());
forceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exclusionStartIndex
->
getDeviceBuffer
());
forceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exceptionParticles
->
getDeviceBuffer
());
forceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exceptionParticles
->
getDeviceBuffer
());
forceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exceptionParams
->
getDeviceBuffer
());
forceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exceptionParams
->
getDeviceBuffer
());
if
(
nonbondedMethod
!=
GayBerneForce
::
NoCutoff
)
{
forceKernel
.
setArg
<
cl_int
>
(
index
++
,
maxNeighborBlocks
);
forceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
neighbors
->
getDeviceBuffer
());
forceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
neighborIndex
->
getDeviceBuffer
());
forceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
neighborBlockCount
->
getDeviceBuffer
());
}
index
=
0
;
index
=
0
;
torqueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
(
useLong
?
cl
.
getLongForceBuffer
().
getDeviceBuffer
()
:
cl
.
getForceBuffers
().
getDeviceBuffer
()));
torqueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
(
useLong
?
cl
.
getLongForceBuffer
().
getDeviceBuffer
()
:
cl
.
getForceBuffers
().
getDeviceBuffer
()));
torqueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
torque
->
getDeviceBuffer
());
torqueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
torque
->
getDeviceBuffer
());
...
@@ -6223,6 +6237,7 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
...
@@ -6223,6 +6237,7 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
if
(
nonbondedMethod
!=
GayBerneForce
::
NoCutoff
)
{
if
(
nonbondedMethod
!=
GayBerneForce
::
NoCutoff
)
{
setPeriodicBoxArgs
(
cl
,
neighborsKernel
,
2
);
setPeriodicBoxArgs
(
cl
,
neighborsKernel
,
2
);
cl
.
executeKernel
(
neighborsKernel
,
numRealParticles
);
cl
.
executeKernel
(
neighborsKernel
,
numRealParticles
);
setPeriodicBoxArgs
(
cl
,
forceKernel
,
20
);
}
}
cl
.
executeKernel
(
forceKernel
,
cl
.
getNonbondedUtilities
().
getNumForceThreadBlocks
()
*
cl
.
getNonbondedUtilities
().
getForceThreadBlockSize
());
cl
.
executeKernel
(
forceKernel
,
cl
.
getNonbondedUtilities
().
getNumForceThreadBlocks
()
*
cl
.
getNonbondedUtilities
().
getForceThreadBlockSize
());
cl
.
executeKernel
(
torqueKernel
,
numRealParticles
);
cl
.
executeKernel
(
torqueKernel
,
numRealParticles
);
...
@@ -6317,8 +6332,8 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
...
@@ -6317,8 +6332,8 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
vector
<
vector
<
int
>
>
excludedAtoms
(
numRealParticles
);
vector
<
vector
<
int
>
>
excludedAtoms
(
numRealParticles
);
for
(
int
i
=
0
;
i
<
excludedPairs
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
excludedPairs
.
size
();
i
++
)
{
int
first
=
min
(
excludedPairs
[
i
].
first
,
excludedPairs
[
i
].
second
);
int
first
=
inverseOrder
[
min
(
excludedPairs
[
i
].
first
,
excludedPairs
[
i
].
second
)
]
;
int
second
=
max
(
excludedPairs
[
i
].
first
,
excludedPairs
[
i
].
second
);
int
second
=
inverseOrder
[
max
(
excludedPairs
[
i
].
first
,
excludedPairs
[
i
].
second
)
]
;
excludedAtoms
[
first
].
push_back
(
second
);
excludedAtoms
[
first
].
push_back
(
second
);
}
}
int
index
=
0
;
int
index
=
0
;
...
...
platforms/opencl/src/kernels/gayBerne.cl
View file @
3250fb43
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics
:
enable
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics
:
enable
#
define
TILE_SIZE
32
#
define
TILE_SIZE
32
#
define
NEIGHBOR_BLOCK_SIZE
32
/**
/**
*
Calculate
the
ellipsoid
coordinate
frames
and
associated
matrices.
*
Calculate
the
ellipsoid
coordinate
frames
and
associated
matrices.
...
@@ -103,12 +104,79 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
...
@@ -103,12 +104,79 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
*neighborBlockCount
=
0
;
*neighborBlockCount
=
0
;
}
}
/**
*
This
is
called
by
findNeighbors
()
to
write
a
block
to
the
neighbor
list.
*/
void
storeNeighbors
(
int
atom1,
int*
neighborBuffer,
int
numAtomsInBuffer,
int
maxNeighborBlocks,
__global
int*
restrict
neighbors,
__global
int*
restrict
neighborIndex,
__global
int*
restrict
neighborBlockCount
)
{
int
blockIndex
=
atom_add
(
neighborBlockCount,
1
)
;
if
(
blockIndex
>=
maxNeighborBlocks
)
return
; // We don't have enough room for the neighbor list.
neighborIndex[blockIndex]
=
atom1
;
int
baseIndex
=
blockIndex*NEIGHBOR_BLOCK_SIZE
;
for
(
int
i
=
0
; i < numAtomsInBuffer; i++)
neighbors[baseIndex+i]
=
neighborBuffer[i]
;
for
(
int
i
=
numAtomsInBuffer
; i < NEIGHBOR_BLOCK_SIZE; i++)
neighbors[baseIndex+i]
=
-1
;
}
/**
/**
*
Build
a
list
of
neighbors
for
each
atom.
*
Build
a
list
of
neighbors
for
each
atom.
*/
*/
__kernel
void
findNeighbors
(
int
numAtoms,
int
maxNeighborBlocks,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__kernel
void
findNeighbors
(
int
numAtoms,
int
maxNeighborBlocks,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__global
real4*
restrict
sortedPos,
__global
real4*
restrict
blockCenter,
__global
real4*
restrict
blockBoundingBox,
__global
int*
restrict
neighbors,
__global
real4*
restrict
sortedPos,
__global
real4*
restrict
blockCenter,
__global
real4*
restrict
blockBoundingBox,
__global
int*
restrict
neighbors,
__global
int2*
restrict
neighborIndex,
__global
int*
restrict
neighborBlockCount
)
{
__global
int*
restrict
neighborIndex,
__global
int*
restrict
neighborBlockCount,
__global
const
int*
restrict
exclusions,
__global
const
int*
restrict
exclusionStartIndex
)
{
const
int
numBlocks
=
(
numAtoms+TILE_SIZE-1
)
/TILE_SIZE
;
int
neighborBuffer[NEIGHBOR_BLOCK_SIZE]
;
for
(
int
atom1
=
get_global_id
(
0
)
; atom1 < numAtoms; atom1 += get_global_size(0)) {
int
nextExclusion
=
exclusionStartIndex[atom1]
;
int
lastExclusion
=
exclusionStartIndex[atom1+1]
;
real4
pos
=
sortedPos[atom1]
;
int
nextBufferIndex
=
0
;
//
Loop
over
atom
blocks
and
compute
the
distance
of
this
atom
from
each
one
's
bounding
box.
for
(
int
block
=
(
atom1+1
)
/TILE_SIZE
; block < numBlocks; block++) {
real4
center
=
blockCenter[block]
;
real4
blockSize
=
blockBoundingBox[block]
;
real4
blockDelta
=
center-pos
;
#
ifdef
USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
blockDelta
)
#
endif
blockDelta.x
=
max
((
real
)
0
,
fabs
(
blockDelta.x
)
-blockSize.x
)
;
blockDelta.y
=
max
((
real
)
0
,
fabs
(
blockDelta.y
)
-blockSize.y
)
;
blockDelta.z
=
max
((
real
)
0
,
fabs
(
blockDelta.z
)
-blockSize.z
)
;
if
(
blockDelta.x*blockDelta.x+blockDelta.y*blockDelta.y+blockDelta.z*blockDelta.z
>=
CUTOFF_SQUARED
)
continue
;
//
Loop
over
atoms
within
this
block.
int
first
=
max
(
block*TILE_SIZE,
atom1+1
)
;
int
last
=
min
((
block+1
)
*TILE_SIZE,
numAtoms
)
;
for
(
int
atom2
=
first
; atom2 < last; atom2++) {
//
Skip
over
excluded
interactions.
if
(
nextExclusion
<
lastExclusion
&&
exclusions[nextExclusion]
>=
atom2
)
{
nextExclusion++
;
continue
;
}
real4
delta
=
pos-sortedPos[atom2]
;
#
ifdef
USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
if
(
r2
<
CUTOFF_SQUARED
)
{
neighborBuffer[nextBufferIndex++]
=
atom2
;
if
(
nextBufferIndex
==
NEIGHBOR_BLOCK_SIZE
)
{
storeNeighbors
(
atom1,
neighborBuffer,
nextBufferIndex,
maxNeighborBlocks,
neighbors,
neighborIndex,
neighborBlockCount
)
;
nextBufferIndex
=
0
;
}
}
}
}
if
(
nextBufferIndex
>
0
)
storeNeighbors
(
atom1,
neighborBuffer,
nextBufferIndex,
maxNeighborBlocks,
neighbors,
neighborIndex,
neighborBlockCount
)
;
}
}
}
typedef
struct
{
typedef
struct
{
...
@@ -184,11 +252,11 @@ void computeOneInteraction(AtomData* data1, AtomData* data2, real sigma, real ep
...
@@ -184,11 +252,11 @@ void computeOneInteraction(AtomData* data1, AtomData* data2, real sigma, real ep
//
Compute
the
switching
function.
//
Compute
the
switching
function.
real
switchValue
=
1
,
switchDeriv
=
0
;
real
switchValue
=
1
,
switchDeriv
=
0
;
#
if
USE_
LJ_
SWITCH
#
if
USE_SWITCH
if
(
r
>
LJ_
SWITCH_CUTOFF
)
{
if
(
r
>
SWITCH_CUTOFF
)
{
real
x
=
r-
LJ_
SWITCH_CUTOFF
;
real
x
=
r-SWITCH_CUTOFF
;
switchValue
=
1+x*x*x*
(
LJ_
SWITCH_C3+x*
(
LJ_
SWITCH_C4+x*
LJ_
SWITCH_C5
))
;
switchValue
=
1+x*x*x*
(
SWITCH_C3+x*
(
SWITCH_C4+x*SWITCH_C5
))
;
switchDeriv
=
x*x*
(
3*
LJ_
SWITCH_C3+x*
(
4*
LJ_
SWITCH_C4+x*5*
LJ_
SWITCH_C5
))
;
switchDeriv
=
x*x*
(
3*SWITCH_C3+x*
(
4*SWITCH_C4+x*5*SWITCH_C5
))
;
}
}
#
endif
#
endif
...
@@ -265,7 +333,7 @@ void computeOneInteraction(AtomData* data1, AtomData* data2, real sigma, real ep
...
@@ -265,7 +333,7 @@ void computeOneInteraction(AtomData* data1, AtomData* data2, real sigma, real ep
d[2][2]
=
scale[2]*
(
a[2][0]*
(
G12[0][1]*G12[1][2]
+
G12[2][1]*G12[1][0]
-
G12[1][1]*
(
G12[0][2]
+
G12[2][0]
))
+
d[2][2]
=
scale[2]*
(
a[2][0]*
(
G12[0][1]*G12[1][2]
+
G12[2][1]*G12[1][0]
-
G12[1][1]*
(
G12[0][2]
+
G12[2][0]
))
+
a[2][1]*
(
G12[1][0]*G12[0][2]
+
G12[2][0]*G12[0][1]
-
G12[0][0]*
(
G12[1][2]
+
G12[2][1]
))
+
a[2][1]*
(
G12[1][0]*G12[0][2]
+
G12[2][0]*G12[0][1]
-
G12[0][0]*
(
G12[1][2]
+
G12[2][1]
))
+
2*a[2][2]*
(
G12[1][1]*G12[0][0]
-
G12[1][0]*G12[0][1]
))
;
2*a[2][2]*
(
G12[1][1]*G12[0][0]
-
G12[1][0]*G12[0][1]
))
;
real3
detadq
;
real3
detadq
=
0
;
for
(
int
i
=
0
; i < 3; i++)
for
(
int
i
=
0
; i < 3; i++)
detadq
+=
cross
((
real3
)
(
a[i][0],
a[i][1],
a[i][2]
)
,
(
real3
)
(
d[i][0],
d[i][1],
d[i][2]
))
;
detadq
+=
cross
((
real3
)
(
a[i][0],
a[i][1],
a[i][2]
)
,
(
real3
)
(
d[i][0],
d[i][1],
d[i][2]
))
;
real3
torque
=
(
dchidq*
(
u*eta
)
+
detadq*
(
u*chi
)
+
dudq*
(
eta*chi
))
*switchValue
;
real3
torque
=
(
dchidq*
(
u*eta
)
+
detadq*
(
u*chi
)
+
dudq*
(
eta*chi
))
*switchValue
;
...
@@ -289,13 +357,72 @@ __kernel void computeForce(
...
@@ -289,13 +357,72 @@ __kernel void computeForce(
__global
const
int*
restrict
exclusions,
__global
const
int*
restrict
exclusionStartIndex,
__global
const
int*
restrict
exclusions,
__global
const
int*
restrict
exclusionStartIndex,
__global
const
int4*
restrict
exceptionParticles,
__global
const
float2*
restrict
exceptionParams
__global
const
int4*
restrict
exceptionParticles,
__global
const
float2*
restrict
exceptionParams
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
,
int
maxNeighborBlocks,
__global
int*
restrict
neighbors,
__global
int*
restrict
neighborIndex,
__global
int*
restrict
neighborBlockCount,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms,
#
endif
#
endif
)
{
)
{
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
mixed
energy
=
0
;
mixed
energy
=
0
;
#
ifdef
USE_CUTOFF
const
int
numBlocks
=
*neighborBlockCount
;
for
(
int
block
=
get_global_id
(
0
)
; block < numBlocks; block += get_global_size(0)) {
//
Load
parameters
for
atom1.
int
atom1
=
neighborIndex[block]
;
int
index1
=
sortedAtoms[atom1]
;
AtomData
data1
;
loadAtomData
(
&data1,
atom1,
index1,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
real3
force1
=
0.0f
;
real3
torque1
=
0.0f
;
for
(
int
indexInBlock
=
0
; indexInBlock < NEIGHBOR_BLOCK_SIZE; indexInBlock++) {
//
Load
parameters
for
atom2.
int
atom2
=
neighbors[NEIGHBOR_BLOCK_SIZE*block+indexInBlock]
;
if
(
atom2
==
-1
)
continue
;
int
index2
=
sortedAtoms[atom2]
;
AtomData
data2
;
loadAtomData
(
&data2,
atom2,
index2,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
real3
force2
=
0.0f
;
real3
torque2
=
0.0f
;
//
Compute
the
interaction.
real3
delta
=
data1.pos-data2.pos
;
#
ifdef
USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
sigma
=
data1.sig.x+data2.sig.x
;
real
epsilon
=
data1.eps.x*data2.eps.x
;
computeOneInteraction
(
&data1,
&data2,
sigma,
epsilon,
delta,
r2,
&force1,
&force2,
&torque1,
&torque2,
&energy
)
;
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[index2],
(
long
)
(
force2.x*0x100000000
))
;
atom_add
(
&forceBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
force2.y*0x100000000
))
;
atom_add
(
&forceBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
force2.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index2],
(
long
)
(
torque2.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
torque2.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
torque2.z*0x100000000
))
;
#
else
unsigned
int
offset
=
index2
+
warp*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
+=
force2.xyz
;
torqueBuffers[offset].xyz
+=
torque2.xyz
;
#
endif
}
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[index1],
(
long
)
(
force1.x*0x100000000
))
;
atom_add
(
&forceBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
force1.y*0x100000000
))
;
atom_add
(
&forceBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
force1.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index1],
(
long
)
(
torque1.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
torque1.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
torque1.z*0x100000000
))
;
#
else
unsigned
int
offset
=
index1
+
warp*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
+=
force1.xyz
;
torqueBuffers[offset].xyz
+=
torque1.xyz
;
#
endif
}
#
else
for
(
int
atom1
=
get_global_id
(
0
)
; atom1 < numAtoms; atom1 += get_global_size(0)) {
for
(
int
atom1
=
get_global_id
(
0
)
; atom1 < numAtoms; atom1 += get_global_size(0)) {
//
Load
parameters
for
atom1.
//
Load
parameters
for
atom1.
...
@@ -309,14 +436,14 @@ __kernel void computeForce(
...
@@ -309,14 +436,14 @@ __kernel void computeForce(
for
(
int
atom2
=
atom1+1
; atom2 < numAtoms; atom2++) {
for
(
int
atom2
=
atom1+1
; atom2 < numAtoms; atom2++) {
//
Skip
over
excluded
interactions.
//
Skip
over
excluded
interactions.
int
index2
=
sortedAtoms[atom2]
;
if
(
nextExclusion
<
lastExclusion
&&
exclusions[nextExclusion]
==
atom2
)
{
if
(
nextExclusion
<
lastExclusion
&&
exclusions[nextExclusion]
==
index2
)
{
nextExclusion++
;
nextExclusion++
;
continue
;
continue
;
}
}
//
Load
parameters
for
atom2.
//
Load
parameters
for
atom2.
int
index2
=
sortedAtoms[atom2]
;
AtomData
data2
;
AtomData
data2
;
loadAtomData
(
&data2,
atom2,
index2,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
loadAtomData
(
&data2,
atom2,
index2,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
real3
force2
=
0.0f
;
real3
force2
=
0.0f
;
...
@@ -325,30 +452,21 @@ __kernel void computeForce(
...
@@ -325,30 +452,21 @@ __kernel void computeForce(
//
Compute
the
interaction.
//
Compute
the
interaction.
real3
delta
=
data1.pos-data2.pos
;
real3
delta
=
data1.pos-data2.pos
;
#
ifdef
USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
real
sigma
=
data1.sig.x+data2.sig.x
;
if
(
r2
<
CUTOFF_SQUARED
)
{
real
epsilon
=
data1.eps.x*data2.eps.x
;
#
endif
computeOneInteraction
(
&data1,
&data2,
sigma,
epsilon,
delta,
r2,
&force1,
&force2,
&torque1,
&torque2,
&energy
)
;
real
sigma
=
data1.sig.x+data2.sig.x
;
real
epsilon
=
data1.eps.x*data2.eps.x
;
computeOneInteraction
(
&data1,
&data2,
sigma,
epsilon,
delta,
r2,
&force1,
&force2,
&torque1,
&torque2,
&energy
)
;
#
ifdef
SUPPORTS_64_BIT_ATOMICS
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[index2],
(
long
)
(
force2.x*0x100000000
))
;
atom_add
(
&forceBuffers[index2],
(
long
)
(
force2.x*0x100000000
))
;
atom_add
(
&forceBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
force2.y*0x100000000
))
;
atom_add
(
&forceBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
force2.y*0x100000000
))
;
atom_add
(
&forceBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
force2.z*0x100000000
))
;
atom_add
(
&forceBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
force2.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index2],
(
long
)
(
torque2.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index2],
(
long
)
(
torque2.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
torque2.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
torque2.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
torque2.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
torque2.z*0x100000000
))
;
#
else
#
else
unsigned
int
offset
=
index2
+
warp*PADDED_NUM_ATOMS
;
unsigned
int
offset
=
index2
+
warp*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
+=
force2.xyz
;
forceBuffers[offset].xyz
+=
force2.xyz
;
torqueBuffers[offset].xyz
+=
torque2.xyz
;
torqueBuffers[offset].xyz
+=
torque2.xyz
;
#
endif
#
ifdef
USE_CUTOFF
}
#
endif
#
endif
}
}
#
ifdef
SUPPORTS_64_BIT_ATOMICS
#
ifdef
SUPPORTS_64_BIT_ATOMICS
...
@@ -364,12 +482,13 @@ __kernel void computeForce(
...
@@ -364,12 +482,13 @@ __kernel void computeForce(
torqueBuffers[offset].xyz
+=
torque1.xyz
;
torqueBuffers[offset].xyz
+=
torque1.xyz
;
#
endif
#
endif
}
}
#
endif
//
Now
compute
exceptions.
//
Now
compute
exceptions.
for
(
int
index
=
get_global_id
(
0
)
; index < numExceptions; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < numExceptions; index += get_global_size(0)) {
int4
atomIndices
=
exceptionParticles[index]
;
int4
atomIndices
=
exceptionParticles[index]
;
real
2
params
=
exceptionParams[index]
;
float
2
params
=
exceptionParams[index]
;
int
index1
=
atomIndices.x,
index2
=
atomIndices.y
;
int
index1
=
atomIndices.x,
index2
=
atomIndices.y
;
int
atom1
=
atomIndices.z,
atom2
=
atomIndices.w
;
int
atom1
=
atomIndices.z,
atom2
=
atomIndices.w
;
AtomData
data1,
data2
;
AtomData
data1,
data2
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment