Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
abadc821
Unverified
Commit
abadc821
authored
Dec 20, 2023
by
Peter Eastman
Committed by
GitHub
Dec 20, 2023
Browse files
Fixed errors in OpenCL on CPU (#4358)
parent
e854f8f3
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
70 additions
and
52 deletions
+70
-52
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+1
-1
platforms/opencl/src/kernels/findInteractingBlocks.cl
platforms/opencl/src/kernels/findInteractingBlocks.cl
+1
-0
platforms/opencl/src/kernels/findInteractingBlocks_cpu.cl
platforms/opencl/src/kernels/findInteractingBlocks_cpu.cl
+68
-9
platforms/opencl/src/kernels/nonbonded_cpu.cl
platforms/opencl/src/kernels/nonbonded_cpu.cl
+0
-42
No files found.
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
abadc821
...
@@ -83,7 +83,7 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con
...
@@ -83,7 +83,7 @@ OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : con
// us from sorting atom blocks by size, which leads to a slightly less efficient neighbor
// us from sorting atom blocks by size, which leads to a slightly less efficient neighbor
// list. We guess based on system size which will be faster.
// list. We guess based on system size which will be faster.
useLargeBlocks
=
(
context
.
getNumAtoms
()
>
100000
);
useLargeBlocks
=
(
!
deviceIsCpu
&&
context
.
getNumAtoms
()
>
100000
);
std
::
string
vendor
=
context
.
getDevice
().
getInfo
<
CL_DEVICE_VENDOR
>
();
std
::
string
vendor
=
context
.
getDevice
().
getInfo
<
CL_DEVICE_VENDOR
>
();
isAMD
=
!
deviceIsCpu
&&
((
vendor
.
size
()
>=
3
&&
vendor
.
substr
(
0
,
3
)
==
"AMD"
)
||
(
vendor
.
size
()
>=
28
&&
vendor
.
substr
(
0
,
28
)
==
"Advanced Micro Devices, Inc."
));
isAMD
=
!
deviceIsCpu
&&
((
vendor
.
size
()
>=
3
&&
vendor
.
substr
(
0
,
3
)
==
"AMD"
)
||
(
vendor
.
size
()
>=
28
&&
vendor
.
substr
(
0
,
28
)
==
"Advanced Micro Devices, Inc."
));
...
...
platforms/opencl/src/kernels/findInteractingBlocks.cl
View file @
abadc821
...
@@ -66,6 +66,7 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
...
@@ -66,6 +66,7 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
if
(
get_global_id
(
0
)
==
0
)
if
(
get_global_id
(
0
)
==
0
)
rebuildNeighborList[0]
=
0
;
rebuildNeighborList[0]
=
0
;
}
}
__kernel
void
computeSortKeys
(
__global
const
real4*
restrict
blockBoundingBox,
__global
unsigned
int*
restrict
sortedBlocks,
__global
real2*
restrict
blockSizeRange,
int
numSizes
)
{
__kernel
void
computeSortKeys
(
__global
const
real4*
restrict
blockBoundingBox,
__global
unsigned
int*
restrict
sortedBlocks,
__global
real2*
restrict
blockSizeRange,
int
numSizes
)
{
//
Find
the
total
range
of
sizes
recorded
by
all
blocks.
//
Find
the
total
range
of
sizes
recorded
by
all
blocks.
...
...
platforms/opencl/src/kernels/findInteractingBlocks_cpu.cl
View file @
abadc821
...
@@ -7,9 +7,10 @@
...
@@ -7,9 +7,10 @@
*/
*/
__kernel
void
findBlockBounds
(
int
numAtoms,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__kernel
void
findBlockBounds
(
int
numAtoms,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__global
const
real4*
restrict
posq,
__global
real4*
restrict
blockCenter,
__global
real4*
restrict
blockBoundingBox,
__global
int*
restrict
rebuildNeighborList,
__global
const
real4*
restrict
posq,
__global
real4*
restrict
blockCenter,
__global
real4*
restrict
blockBoundingBox,
__global
int*
restrict
rebuildNeighborList,
__global
real2*
restrict
sortedBlocks
)
{
__global
real2*
restrict
blockSizeRange
)
{
int
index
=
get_global_id
(
0
)
;
int
index
=
get_global_id
(
0
)
;
int
base
=
index*TILE_SIZE
;
int
base
=
index*TILE_SIZE
;
real
minSize
=
1e38
,
maxSize
=
0
;
while
(
base
<
numAtoms
)
{
while
(
base
<
numAtoms
)
{
real4
pos
=
posq[base]
;
real4
pos
=
posq[base]
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
...
@@ -28,20 +29,79 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
...
@@ -28,20 +29,79 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
maxPos
=
max
(
maxPos,
pos
)
;
maxPos
=
max
(
maxPos,
pos
)
;
}
}
real4
blockSize
=
0.5f*
(
maxPos-minPos
)
;
real4
blockSize
=
0.5f*
(
maxPos-minPos
)
;
real4
center
=
0.5f*
(
maxPos+minPos
)
;
center.w
=
0
;
for
(
int
i
=
base
; i < last; i++) {
pos
=
posq[i]
;
real4
delta
=
posq[i]-center
;
#
ifdef
USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
center.w
=
max
(
center.w,
delta.x*delta.x+delta.y*delta.y+delta.z*delta.z
)
;
}
center.w
=
sqrt
(
center.w
)
;
blockBoundingBox[index]
=
blockSize
;
blockBoundingBox[index]
=
blockSize
;
blockCenter[index]
=
0.5f*
(
maxPos+minPos
)
;
blockCenter[index]
=
center
;
sortedBlocks[index]
=
(
real2
)
(
blockSize.x+blockSize.y+blockSize.z,
index
)
;
real
totalSize
=
blockSize.x+blockSize.y+blockSize.z
;
minSize
=
min
(
minSize,
totalSize
)
;
maxSize
=
max
(
maxSize,
totalSize
)
;
index
+=
get_global_size
(
0
)
;
index
+=
get_global_size
(
0
)
;
base
=
index*TILE_SIZE
;
base
=
index*TILE_SIZE
;
}
}
//
Record
the
range
of
sizes
seen
by
threads
in
this
block.
__local
real
minBuffer[64],
maxBuffer[64]
;
minBuffer[get_local_id
(
0
)
]
=
minSize
;
maxBuffer[get_local_id
(
0
)
]
=
maxSize
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
step
=
1
; step < 64; step *= 2) {
if
(
get_local_id
(
0
)
+step
<
64
&&
get_local_id
(
0
)
%
(
2*step
)
==
0
)
{
minBuffer[get_local_id
(
0
)
]
=
min
(
minBuffer[get_local_id
(
0
)
],
minBuffer[get_local_id
(
0
)
+step]
)
;
maxBuffer[get_local_id
(
0
)
]
=
max
(
maxBuffer[get_local_id
(
0
)
],
maxBuffer[get_local_id
(
0
)
+step]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
get_local_id
(
0
)
==
0
)
blockSizeRange[get_group_id
(
0
)
]
=
make_real2
(
minBuffer[0],
maxBuffer[0]
)
;
if
(
get_global_id
(
0
)
==
0
)
if
(
get_global_id
(
0
)
==
0
)
rebuildNeighborList[0]
=
0
;
rebuildNeighborList[0]
=
0
;
}
}
__kernel
void
computeSortKeys
(
__global
const
real4*
restrict
blockBoundingBox,
__global
unsigned
int*
restrict
sortedBlocks,
__global
real2*
restrict
blockSizeRange,
int
numSizes
)
{
//
Find
the
total
range
of
sizes
recorded
by
all
blocks.
__local
real2
sizeRange
;
if
(
get_local_id
(
0
)
==
0
)
{
sizeRange
=
blockSizeRange[0]
;
for
(
int
i
=
1
; i < numSizes; i++) {
real2
size
=
blockSizeRange[i]
;
sizeRange.x
=
min
(
sizeRange.x,
size.x
)
;
sizeRange.y
=
max
(
sizeRange.y,
size.y
)
;
}
sizeRange.x
=
LOG
(
sizeRange.x
)
;
sizeRange.y
=
LOG
(
sizeRange.y
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Sort
keys
store
the
bin
in
the
high
order
part
and
the
block
in
the
low
//
order
part.
int
numSizeBins
=
20
;
real
scale
=
numSizeBins/
(
sizeRange.y-sizeRange.x
)
;
for
(
unsigned
int
i
=
get_global_id
(
0
)
; i < NUM_BLOCKS; i += get_global_size(0)) {
real4
box
=
blockBoundingBox[i]
;
real
size
=
LOG
(
box.x+box.y+box.z
)
;
int
bin
=
(
size-sizeRange.x
)
*scale
;
bin
=
max
(
0
,
min
(
bin,
numSizeBins-1
))
;
sortedBlocks[i]
=
(((
unsigned
int
)
bin
)
<<BIN_SHIFT
)
+
i
;
}
}
/**
/**
*
Sort
the
data
about
bounding
boxes
so
it
can
be
accessed
more
efficiently
in
the
next
kernel.
*
Sort
the
data
about
bounding
boxes
so
it
can
be
accessed
more
efficiently
in
the
next
kernel.
*/
*/
__kernel
void
sortBoxData
(
__global
const
real2
*
restrict
sortedBlock,
__global
const
real4*
restrict
blockCenter,
__kernel
void
sortBoxData
(
__global
const
unsigned
int
*
restrict
sortedBlock
s
,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockBoundingBox,
__global
real4*
restrict
sortedBlockCenter,
__global
const
real4*
restrict
blockBoundingBox,
__global
real4*
restrict
sortedBlockCenter,
__global
real4*
restrict
sortedBlockBoundingBox,
__global
const
real4*
restrict
posq,
__global
const
real4*
restrict
oldPositions,
__global
real4*
restrict
sortedBlockBoundingBox,
__global
const
real4*
restrict
posq,
__global
const
real4*
restrict
oldPositions,
__global
unsigned
int*
restrict
interactionCount,
__global
int*
restrict
rebuildNeighborList,
int
forceRebuild
__global
unsigned
int*
restrict
interactionCount,
__global
int*
restrict
rebuildNeighborList,
int
forceRebuild
...
@@ -51,7 +111,7 @@ __kernel void sortBoxData(__global const real2* restrict sortedBlock, __global c
...
@@ -51,7 +111,7 @@ __kernel void sortBoxData(__global const real2* restrict sortedBlock, __global c
#
endif
#
endif
)
{
)
{
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_BLOCKS; i += get_global_size(0)) {
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_BLOCKS; i += get_global_size(0)) {
int
index
=
(
int
)
sortedBlock[i]
.y
;
unsigned
int
index
=
sortedBlock
s
[i]
&
BLOCK_INDEX_MASK
;
sortedBlockCenter[i]
=
blockCenter[index]
;
sortedBlockCenter[i]
=
blockCenter[index]
;
sortedBlockBoundingBox[i]
=
blockBoundingBox[index]
;
sortedBlockBoundingBox[i]
=
blockBoundingBox[index]
;
}
}
...
@@ -166,7 +226,7 @@ void storeInteractionData(int x, int* buffer, int* atoms, int* numAtoms, int num
...
@@ -166,7 +226,7 @@ void storeInteractionData(int x, int* buffer, int* atoms, int* numAtoms, int num
*/
*/
__kernel
void
findBlocksWithInteractions
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__kernel
void
findBlocksWithInteractions
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__global
unsigned
int*
restrict
interactionCount,
__global
int*
restrict
interactingTiles,
__global
unsigned
int*
restrict
interactingAtoms,
__global
unsigned
int*
restrict
interactionCount,
__global
int*
restrict
interactingTiles,
__global
unsigned
int*
restrict
interactingAtoms,
__global
const
real4*
restrict
posq,
unsigned
int
maxTiles,
unsigned
int
startBlockIndex,
unsigned
int
numBlocks,
__global
real2
*
restrict
sortedBlocks,
__global
const
real4*
restrict
posq,
unsigned
int
maxTiles,
unsigned
int
startBlockIndex,
unsigned
int
numBlocks,
__global
unsigned
int
*
restrict
sortedBlocks,
__global
const
real4*
restrict
sortedBlockCenter,
__global
const
real4*
restrict
sortedBlockBoundingBox,
__global
const
real4*
restrict
sortedBlockCenter,
__global
const
real4*
restrict
sortedBlockBoundingBox,
__global
const
unsigned
int*
restrict
exclusionIndices,
__global
const
unsigned
int*
restrict
exclusionRowIndices,
__global
real4*
restrict
oldPositions,
__global
const
unsigned
int*
restrict
exclusionIndices,
__global
const
unsigned
int*
restrict
exclusionRowIndices,
__global
real4*
restrict
oldPositions,
__global
const
int*
restrict
rebuildNeighborList
__global
const
int*
restrict
rebuildNeighborList
...
@@ -187,8 +247,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
...
@@ -187,8 +247,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
for
(
int
i
=
startBlockIndex+get_group_id
(
0
)
; i < startBlockIndex+numBlocks; i += get_num_groups(0)) {
for
(
int
i
=
startBlockIndex+get_group_id
(
0
)
; i < startBlockIndex+numBlocks; i += get_num_groups(0)) {
valuesInBuffer
=
0
;
valuesInBuffer
=
0
;
numAtoms
=
0
;
numAtoms
=
0
;
real2
sortedKey
=
sortedBlocks[i]
;
int
x
=
sortedBlocks[i]
&
BLOCK_INDEX_MASK
;
int
x
=
(
int
)
sortedKey.y
;
real4
blockCenterX
=
sortedBlockCenter[i]
;
real4
blockCenterX
=
sortedBlockCenter[i]
;
real4
blockSizeX
=
sortedBlockBoundingBox[i]
;
real4
blockSizeX
=
sortedBlockBoundingBox[i]
;
...
@@ -204,7 +263,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
...
@@ -204,7 +263,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
for
(
int
j
=
i+1
; j < NUM_BLOCKS; j++) {
for
(
int
j
=
i+1
; j < NUM_BLOCKS; j++) {
real2
sortedKey2
=
sortedBlocks[j]
;
real2
sortedKey2
=
sortedBlocks[j]
;
int
y
=
(
int
)
sortedKey2.y
;
int
y
=
sortedBlocks[j]
&
BLOCK_INDEX_MASK
;
bool
hasExclusions
=
false
;
bool
hasExclusions
=
false
;
for
(
int
k
=
0
; k < numExclusions; k++)
for
(
int
k
=
0
; k < numExclusions; k++)
hasExclusions
|
=
(
exclusionsForX[k]
==
y
)
;
hasExclusions
|
=
(
exclusionsForX[k]
==
y
)
;
...
...
platforms/opencl/src/kernels/nonbonded_cpu.cl
View file @
abadc821
...
@@ -10,11 +10,7 @@ typedef struct {
...
@@ -10,11 +10,7 @@ typedef struct {
*/
*/
__kernel
void
computeNonbonded
(
__kernel
void
computeNonbonded
(
#
ifdef
SUPPORTS_64_BIT_ATOMICS
__global
long*
restrict
forceBuffers,
__global
long*
restrict
forceBuffers,
#
else
__global
real4*
restrict
forceBuffers,
#
endif
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
int2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
long
numTileIndices
__global
const
int2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
long
numTileIndices
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -102,14 +98,9 @@ __kernel void computeNonbonded(
...
@@ -102,14 +98,9 @@ __kernel void computeNonbonded(
// Write results.
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
#else
unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
#endif
}
}
}
}
else {
else {
...
@@ -178,32 +169,18 @@ __kernel void computeNonbonded(
...
@@ -178,32 +169,18 @@ __kernel void computeNonbonded(
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
#else
unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
#endif
}
}
// Write results.
// Write results.
for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = y*TILE_SIZE + tgx;
unsigned int offset = y*TILE_SIZE + tgx;
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(localData[tgx].fx));
ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(localData[tgx].fx));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[tgx].fy));
ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[tgx].fy));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[tgx].fz));
ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(localData[tgx].fz));
#else
unsigned int offset = y*TILE_SIZE+tgx + get_group_id(0)*PADDED_NUM_ATOMS;
real4 f = forceBuffers[offset];
f.x += localData[tgx].fx;
f.y += localData[tgx].fy;
f.z += localData[tgx].fz;
forceBuffers[offset] = f;
#endif
}
}
}
}
}
}
...
@@ -337,14 +314,9 @@ __kernel void computeNonbonded(
...
@@ -337,14 +314,9 @@ __kernel void computeNonbonded(
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) realToFixedPoint(force.x));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
#else
unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
#endif
}
}
}
}
else
else
...
@@ -404,14 +376,9 @@ __kernel void computeNonbonded(
...
@@ -404,14 +376,9 @@ __kernel void computeNonbonded(
//
Write
results
for
atom1.
//
Write
results
for
atom1.
#
ifdef
SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&forceBuffers[atom1],
(
mm_ulong
)
realToFixedPoint
(
force.x
))
;
ATOMIC_ADD
(
&forceBuffers[atom1],
(
mm_ulong
)
realToFixedPoint
(
force.x
))
;
ATOMIC_ADD
(
&forceBuffers[atom1+PADDED_NUM_ATOMS],
(
mm_ulong
)
realToFixedPoint
(
force.y
))
;
ATOMIC_ADD
(
&forceBuffers[atom1+PADDED_NUM_ATOMS],
(
mm_ulong
)
realToFixedPoint
(
force.y
))
;
ATOMIC_ADD
(
&forceBuffers[atom1+2*PADDED_NUM_ATOMS],
(
mm_ulong
)
realToFixedPoint
(
force.z
))
;
ATOMIC_ADD
(
&forceBuffers[atom1+2*PADDED_NUM_ATOMS],
(
mm_ulong
)
realToFixedPoint
(
force.z
))
;
#
else
unsigned
int
offset
=
atom1
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
=
forceBuffers[offset].xyz+force.xyz
;
#
endif
}
}
}
}
...
@@ -424,18 +391,9 @@ __kernel void computeNonbonded(
...
@@ -424,18 +391,9 @@ __kernel void computeNonbonded(
unsigned
int
atom2
=
y*TILE_SIZE
+
tgx
;
unsigned
int
atom2
=
y*TILE_SIZE
+
tgx
;
#
endif
#
endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#
ifdef
SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&forceBuffers[atom2],
(
mm_ulong
)
realToFixedPoint
(
localData[tgx].fx
))
;
ATOMIC_ADD
(
&forceBuffers[atom2],
(
mm_ulong
)
realToFixedPoint
(
localData[tgx].fx
))
;
ATOMIC_ADD
(
&forceBuffers[atom2+PADDED_NUM_ATOMS],
(
mm_ulong
)
realToFixedPoint
(
localData[tgx].fy
))
;
ATOMIC_ADD
(
&forceBuffers[atom2+PADDED_NUM_ATOMS],
(
mm_ulong
)
realToFixedPoint
(
localData[tgx].fy
))
;
ATOMIC_ADD
(
&forceBuffers[atom2+2*PADDED_NUM_ATOMS],
(
mm_ulong
)
realToFixedPoint
(
localData[tgx].fz
))
;
ATOMIC_ADD
(
&forceBuffers[atom2+2*PADDED_NUM_ATOMS],
(
mm_ulong
)
realToFixedPoint
(
localData[tgx].fz
))
;
#
else
unsigned
int
offset
=
atom2
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
real4
f
=
forceBuffers[offset]
;
f.x
+=
localData[tgx].fx
;
f.y
+=
localData[tgx].fy
;
f.z
+=
localData[tgx].fz
;
forceBuffers[offset]
=
f
;
#
endif
}
}
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment