Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
e43f14bf
Commit
e43f14bf
authored
Oct 21, 2009
by
Peter Eastman
Browse files
Created OpenCL implementation of NonbondedForce that works with non-Nvidia processors
parent
2529b922
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
201 additions
and
22 deletions
+201
-22
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+5
-1
platforms/opencl/src/OpenCLContext.h
platforms/opencl/src/OpenCLContext.h
+7
-0
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+9
-8
platforms/opencl/src/kernels/nonbonded_default.cl
platforms/opencl/src/kernels/nonbonded_default.cl
+167
-0
platforms/opencl/src/kernels/nonbonded_nvidia.cl
platforms/opencl/src/kernels/nonbonded_nvidia.cl
+13
-13
No files found.
platforms/opencl/src/OpenCLContext.cpp
View file @
e43f14bf
...
@@ -64,8 +64,12 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), ste
...
@@ -64,8 +64,12 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), ste
throw
OpenMMException
(
"The specified OpenCL device is not compatible with OpenMM"
);
throw
OpenMMException
(
"The specified OpenCL device is not compatible with OpenMM"
);
compilationOptions
=
"-cl-fast-relaxed-math"
;
compilationOptions
=
"-cl-fast-relaxed-math"
;
string
vendor
=
device
.
getInfo
<
CL_DEVICE_VENDOR
>
();
string
vendor
=
device
.
getInfo
<
CL_DEVICE_VENDOR
>
();
if
(
vendor
.
size
()
>=
6
&&
vendor
.
substr
(
0
,
6
)
==
"NVIDIA"
)
if
(
vendor
.
size
()
>=
6
&&
vendor
.
substr
(
0
,
6
)
==
"NVIDIA"
)
{
compilationOptions
+=
" -DWARPS_ARE_ATOMIC"
;
compilationOptions
+=
" -DWARPS_ARE_ATOMIC"
;
simdWidth
=
32
;
}
else
simdWidth
=
1
;
queue
=
cl
::
CommandQueue
(
context
,
device
);
queue
=
cl
::
CommandQueue
(
context
,
device
);
numAtoms
=
numParticles
;
numAtoms
=
numParticles
;
paddedNumAtoms
=
TileSize
*
((
numParticles
+
TileSize
-
1
)
/
TileSize
);
paddedNumAtoms
=
TileSize
*
((
numParticles
+
TileSize
-
1
)
/
TileSize
);
...
...
platforms/opencl/src/OpenCLContext.h
View file @
e43f14bf
...
@@ -251,6 +251,12 @@ public:
...
@@ -251,6 +251,12 @@ public:
int
getNumForceBuffers
()
const
{
int
getNumForceBuffers
()
const
{
return
numForceBuffers
;
return
numForceBuffers
;
}
}
/**
* Get the SIMD width of the device being used.
*/
int
getSIMDWidth
()
const
{
return
simdWidth
;
}
/**
/**
* Get the OpenCLIntegrationUtilities for this context.
* Get the OpenCLIntegrationUtilities for this context.
*/
*/
...
@@ -281,6 +287,7 @@ private:
...
@@ -281,6 +287,7 @@ private:
int
numAtomBlocks
;
int
numAtomBlocks
;
int
numThreadBlocks
;
int
numThreadBlocks
;
int
numForceBuffers
;
int
numForceBuffers
;
int
simdWidth
;
std
::
string
compilationOptions
;
std
::
string
compilationOptions
;
cl
::
Context
context
;
cl
::
Context
context
;
cl
::
Device
device
;
cl
::
Device
device
;
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
e43f14bf
...
@@ -346,7 +346,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
...
@@ -346,7 +346,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
padded
<<
context
.
getPaddedNumAtoms
();
padded
<<
context
.
getPaddedNumAtoms
();
defines
[
"NUM_ATOMS"
]
=
natom
.
str
();
defines
[
"NUM_ATOMS"
]
=
natom
.
str
();
defines
[
"PADDED_NUM_ATOMS"
]
=
padded
.
str
();
defines
[
"PADDED_NUM_ATOMS"
]
=
padded
.
str
();
cl
::
Program
program
=
context
.
createProgram
(
context
.
loadSourceFromFile
(
"nonbonded.cl"
,
replacements
),
defines
);
string
filename
=
(
context
.
getSIMDWidth
()
==
32
?
"nonbonded_nvidia.cl"
:
"nonbonded_default.cl"
);
cl
::
Program
program
=
context
.
createProgram
(
context
.
loadSourceFromFile
(
filename
,
replacements
),
defines
);
cl
::
Kernel
kernel
(
program
,
"computeNonbonded"
);
cl
::
Kernel
kernel
(
program
,
"computeNonbonded"
);
// Set arguments to the Kernel.
// Set arguments to the Kernel.
...
@@ -358,17 +359,17 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
...
@@ -358,17 +359,17 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
kernel
.
setArg
<
cl
::
Buffer
>
(
4
,
exclusionIndex
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
4
,
exclusionIndex
->
getDeviceBuffer
());
kernel
.
setArg
(
5
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
kernel
.
setArg
(
5
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
kernel
.
setArg
(
6
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
kernel
.
setArg
(
6
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
int
paramBase
=
9
;
kernel
.
setArg
(
7
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
int
paramBase
=
10
;
if
(
useCutoff
)
{
if
(
useCutoff
)
{
paramBase
=
11
;
paramBase
=
11
;
kernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
8
,
interactingTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
8
,
interactionFlags
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
9
,
interactionFlags
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
9
,
interactionCount
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
10
,
interactionCount
->
getDeviceBuffer
());
kernel
.
setArg
(
10
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
}
}
else
{
else
{
kernel
.
setArg
<
cl
::
Buffer
>
(
7
,
tiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
8
,
tiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl_uint
>
(
8
,
tiles
->
getSize
());
kernel
.
setArg
<
cl_uint
>
(
9
,
tiles
->
getSize
());
}
}
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
kernel
.
setArg
<
cl
::
Buffer
>
(
i
*
2
+
paramBase
,
params
[
i
].
getBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
i
*
2
+
paramBase
,
params
[
i
].
getBuffer
());
...
...
platforms/opencl/src/kernels/nonbonded_default.cl
0 → 100644
View file @
e43f14bf
const
unsigned
int
TileSize
=
32
;
/**
*
Compute
nonbonded
interactions.
*/
__kernel
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__local
float4*
local_posq,
__local
float4*
local_force,
__local
float4*
tempBuffer,
__global
unsigned
int*
tiles,
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount
#
else
unsigned
int
numTiles
#
endif
PARAMETER_ARGUMENTS
)
{
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
#
endif
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
unsigned
int
x
=
tiles[pos]
;
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
*TileSize
;
bool
hasExclusions
=
(
x
&
0x1
)
;
x
=
(
x>>17
)
*TileSize
;
unsigned
int
baseLocalAtom
=
(
get_local_id
(
0
)
<
TileSize
?
0
:
TileSize/2
)
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TileSize-1
)
;
unsigned
int
forceBufferOffset
=
(
tgx
<
TileSize/2
?
0
:
TileSize
)
;
unsigned
int
atom1
=
x
+
tgx
;
float4
force
=
0.0f
;
float4
posq1
=
posq[atom1]
;
LOAD_ATOM1_PARAMETERS
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
local_posq[get_local_id
(
0
)
]
=
posq1
;
LOAD_LOCAL_PARAMETERS_FROM_1
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
unsigned
int
xi
=
x/TileSize
;
unsigned
int
tile
=
xi+xi*PADDED_NUM_ATOMS/TileSize-xi*
(
xi+1
)
/2
;
#
ifdef
USE_EXCLUSIONS
unsigned
int
excl
=
exclusions[exclusionIndices[tile]+tgx]
>>
baseLocalAtom
;
#
endif
for
(
unsigned
int
j
=
0
; j < TileSize/2; j++) {
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
int
atom2
=
baseLocalAtom+j
;
float4
posq2
=
local_posq[atom2]
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.z
-=
floor
(
delta.z/PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r
=
sqrt
(
r2
)
;
float
invR
=
1.0f/r
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+baseLocalAtom+j
;
float
dEdR
=
0.0f
;
float
tempEnergy
=
0.0f
;
COMPUTE_INTERACTION
energy
+=
0.5f*tempEnergy
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
excl
>>=
1
;
}
//
Sum
the
forces
and
write
results.
if
(
get_local_id
(
0
)
>=
TileSize
)
tempBuffer[get_local_id
(
0
)
]
=
force
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
TileSize
)
{
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
unsigned
int
offset
=
x
+
tgx
+
(
x/TileSize
)
*PADDED_NUM_ATOMS
;
#
else
unsigned
int
offset
=
x
+
tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
#
endif
forceBuffers[offset].xyz
+=
force.xyz+tempBuffer[get_local_id
(
0
)
+TileSize].xyz
;
}
}
else
{
//
This
is
an
off-diagonal
tile.
if
(
lasty
!=
y
&&
get_local_id
(
0
)
<
TileSize
)
{
unsigned
int
j
=
y
+
tgx
;
local_posq[get_local_id
(
0
)
]
=
posq[j]
;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
local_force[get_local_id
(
0
)
]
=
0.0f
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
interactionFlags[pos]
;
if
(
!hasExclusions
&&
flags
==
0
)
{
//
No
interactions
in
this
tile.
}
else
#
endif
{
//
Compute
the
full
set
of
interactions
in
this
tile.
unsigned
int
xi
=
x/TileSize
;
unsigned
int
yi
=
y/TileSize
;
unsigned
int
tile
=
xi+yi*PADDED_NUM_ATOMS/TileSize-yi*
(
yi+1
)
/2
;
#
ifdef
USE_EXCLUSIONS
unsigned
int
excl
=
(
hasExclusions
?
exclusions[exclusionIndices[tile]+tgx]
:
0xFFFFFFFF
)
;
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
TileSize
-
tgx
))
;
excl
>>=
baseLocalAtom
;
#
endif
unsigned
int
tj
=
tgx%
(
TileSize/2
)
;
for
(
unsigned
int
j
=
0
; j < TileSize/2; j++) {
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
int
atom2
=
baseLocalAtom+tj
;
float4
posq2
=
local_posq[atom2]
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.z
-=
floor
(
delta.z/PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r
=
sqrt
(
r2
)
;
float
invR
=
1.0f/r
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+baseLocalAtom+tj
;
float
dEdR
=
0.0f
;
float
tempEnergy
=
0.0f
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
local_force[baseLocalAtom+tj+forceBufferOffset].xyz
+=
delta.xyz
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
excl
>>=
1
;
tj
=
(
tj+1
)
%
(
TileSize/2
)
;
}
}
//
Sum
the
forces
and
write
results.
if
(
get_local_id
(
0
)
>=
TileSize
)
tempBuffer[get_local_id
(
0
)
]
=
force
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
TileSize
)
{
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
unsigned
int
offset1
=
x
+
tgx
+
(
y/TileSize
)
*PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
+
tgx
+
(
x/TileSize
)
*PADDED_NUM_ATOMS
;
#
else
unsigned
int
offset1
=
x
+
tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
+
tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
#
endif
forceBuffers[offset1].xyz
+=
force.xyz+tempBuffer[get_local_id
(
0
)
+TileSize].xyz
;
forceBuffers[offset2].xyz
+=
local_force[get_local_id
(
0
)
].xyz+local_force[get_local_id
(
0
)
+TileSize].xyz
;
}
lasty
=
y
;
}
pos++
;
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
}
platforms/opencl/src/kernels/nonbonded.cl
→
platforms/opencl/src/kernels/nonbonded
_nvidia
.cl
View file @
e43f14bf
...
@@ -4,10 +4,10 @@ const unsigned int TileSize = 32;
...
@@ -4,10 +4,10 @@ const unsigned int TileSize = 32;
*
Compute
nonbonded
interactions.
*
Compute
nonbonded
interactions.
*/
*/
__kernel
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__kernel
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__local
float4*
local_posq,
__local
float4*
local_force,
__global
unsigned
int*
tiles,
__global
unsigned
int*
exclusionIndices,
__local
float4*
local_posq,
__local
float4*
local_force,
__local
float4*
tempBuffer,
__global
unsigned
int*
tiles,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount
,
__local
float4*
tempBuffer
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
@@ -30,7 +30,6 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -30,7 +30,6 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
x
=
(
x>>17
)
*TileSize
;
x
=
(
x>>17
)
*TileSize
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TileSize-1
)
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TileSize-1
)
;
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
unsigned
int
tj
=
tgx
;
unsigned
int
atom1
=
x
+
tgx
;
unsigned
int
atom1
=
x
+
tgx
;
float4
force
=
0.0f
;
float4
force
=
0.0f
;
float4
posq1
=
posq[atom1]
;
float4
posq1
=
posq[atom1]
;
...
@@ -49,7 +48,9 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -49,7 +48,9 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
#
ifdef
USE_EXCLUSIONS
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
#
endif
float4
delta
=
(
float4
)
(
local_posq[tbx+j].xyz
-
posq1.xyz,
0.0f
)
;
int
atom2
=
tbx+j
;
float4
posq2
=
local_posq[atom2]
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.x
-=
floor
(
delta.x/PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.y
-=
floor
(
delta.y/PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
...
@@ -58,8 +59,6 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -58,8 +59,6 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r
=
sqrt
(
r2
)
;
float
r
=
sqrt
(
r2
)
;
float
invR
=
1.0f/r
;
float
invR
=
1.0f/r
;
int
atom2
=
tbx+j
;
float4
posq2
=
local_posq[atom2]
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
atom2
=
y+j
;
atom2
=
y+j
;
float
dEdR
=
0.0f
;
float
dEdR
=
0.0f
;
...
@@ -100,7 +99,9 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -100,7 +99,9 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
for
(
unsigned
int
j
=
0
; j < TileSize; j++) {
for
(
unsigned
int
j
=
0
; j < TileSize; j++) {
if
((
flags&
(
1<<j
))
!=
0
)
{
if
((
flags&
(
1<<j
))
!=
0
)
{
bool
isExcluded
=
false
;
bool
isExcluded
=
false
;
float4
delta
=
(
float4
)
(
local_posq[tbx+j].xyz
-
posq1.xyz,
0.0f
)
;
int
atom2
=
tbx+j
;
float4
posq2
=
local_posq[atom2]
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.x
-=
floor
(
delta.x/PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.y
-=
floor
(
delta.y/PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
...
@@ -109,8 +110,6 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -109,8 +110,6 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r
=
sqrt
(
r2
)
;
float
r
=
sqrt
(
r2
)
;
float
invR
=
1.0f/r
;
float
invR
=
1.0f/r
;
int
atom2
=
tbx+j
;
float4
posq2
=
local_posq[atom2]
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
atom2
=
y+j
;
atom2
=
y+j
;
float
dEdR
=
0.0f
;
float
dEdR
=
0.0f
;
...
@@ -149,11 +148,14 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -149,11 +148,14 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
unsigned
int
excl
=
(
hasExclusions
?
exclusions[exclusionIndices[tile]+tgx]
:
0xFFFFFFFF
)
;
unsigned
int
excl
=
(
hasExclusions
?
exclusions[exclusionIndices[tile]+tgx]
:
0xFFFFFFFF
)
;
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
TileSize
-
tgx
))
;
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
TileSize
-
tgx
))
;
#
endif
#
endif
unsigned
int
tj
=
tgx
;
for
(
unsigned
int
j
=
0
; j < TileSize; j++) {
for
(
unsigned
int
j
=
0
; j < TileSize; j++) {
#
ifdef
USE_EXCLUSIONS
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
#
endif
float4
delta
=
(
float4
)
(
local_posq[tbx+tj].xyz
-
posq1.xyz,
0.0f
)
;
int
atom2
=
tbx+tj
;
float4
posq2
=
local_posq[atom2]
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.x
-=
floor
(
delta.x/PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.y
-=
floor
(
delta.y/PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
...
@@ -162,8 +164,6 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -162,8 +164,6 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r
=
sqrt
(
r2
)
;
float
r
=
sqrt
(
r2
)
;
float
invR
=
1.0f/r
;
float
invR
=
1.0f/r
;
int
atom2
=
tbx+tj
;
float4
posq2
=
local_posq[atom2]
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
atom2
=
y+tj
;
atom2
=
y+tj
;
float
dEdR
=
0.0f
;
float
dEdR
=
0.0f
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment