Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
6bea0a80
Commit
6bea0a80
authored
Dec 13, 2011
by
Peter Eastman
Browse files
Workarounds for bug in AMD compiler on Mac
parent
8ae2a598
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
49 additions
and
2 deletions
+49
-2
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+2
-0
platforms/opencl/src/kernels/findInteractingBlocks.cl
platforms/opencl/src/kernels/findInteractingBlocks.cl
+23
-2
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+19
-0
platforms/opencl/src/kernels/sort.cl
platforms/opencl/src/kernels/sort.cl
+5
-0
No files found.
platforms/opencl/src/OpenCLContext.cpp
View file @
6bea0a80
...
@@ -130,6 +130,8 @@ OpenCLContext::OpenCLContext(int numParticles, int platformIndex, int deviceInde
...
@@ -130,6 +130,8 @@ OpenCLContext::OpenCLContext(int numParticles, int platformIndex, int deviceInde
}
}
else
else
simdWidth
=
1
;
simdWidth
=
1
;
if
(
platforms
[
0
].
getInfo
<
CL_PLATFORM_VENDOR
>
()
==
"Apple"
&&
vendor
==
"AMD"
)
compilationDefines
[
"MAC_AMD_WORKAROUND"
]
==
""
;
if
(
supports64BitGlobalAtomics
)
if
(
supports64BitGlobalAtomics
)
compilationDefines
[
"SUPPORTS_64_BIT_ATOMICS"
]
=
""
;
compilationDefines
[
"SUPPORTS_64_BIT_ATOMICS"
]
=
""
;
if
(
supportsDoublePrecision
)
if
(
supportsDoublePrecision
)
...
...
platforms/opencl/src/kernels/findInteractingBlocks.cl
View file @
6bea0a80
...
@@ -100,9 +100,20 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
...
@@ -100,9 +100,20 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
//
Load
an
atom
position
and
the
bounding
box
the
other
block.
//
Load
an
atom
position
and
the
bounding
box
the
other
block.
int
box
=
(
group
==
0
?
x
:
y
)
;
int
atom
=
(
group
==
0
?
y
:
x
)
*TILE_SIZE+index
;
#
ifdef
MAC_AMD_WORKAROUND
__global
float*
bc
=
(
__global
float*
)
blockCenter
;
__global
float*
bb
=
(
__global
float*
)
blockBoundingBox
;
__global
float*
ps
=
(
__global
float*
)
posq
;
center
=
(
float4
)
(
bc[4*box],
bc[4*box+1],
bc[4*box+2],
0.0f
)
;
boxSize
=
(
float4
)
(
bb[4*box],
bb[4*box+1],
bb[4*box+2],
0.0f
)
;
pos
=
(
float4
)
(
ps[4*atom],
ps[4*atom+1],
ps[4*atom+2],
0.0f
)
;
#
else
center
=
blockCenter[
(
group
==
0
?
x
:
y
)
]
;
center
=
blockCenter[
(
group
==
0
?
x
:
y
)
]
;
boxSize
=
blockBoundingBox[
(
group
==
0
?
x
:
y
)
]
;
boxSize
=
blockBoundingBox[
(
group
==
0
?
x
:
y
)
]
;
pos
=
posq[
(
group
==
0
?
y
:
x
)
*TILE_SIZE+index]
;
pos
=
posq[
(
group
==
0
?
y
:
x
)
*TILE_SIZE+index]
;
#
endif
//
Find
the
distance
of
the
atom
from
the
bounding
box.
//
Find
the
distance
of
the
atom
from
the
bounding
box.
...
@@ -182,14 +193,24 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
...
@@ -182,14 +193,24 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
//
Find
the
distance
between
the
bounding
boxes
of
the
two
cells.
//
Find
the
distance
between
the
bounding
boxes
of
the
two
cells.
#
ifdef
MAC_AMD_WORKAROUND
__global
float*
bc
=
(
__global
float*
)
blockCenter
;
__global
float*
bb
=
(
__global
float*
)
blockBoundingBox
;
float4
bcx
=
(
float4
)
(
bc[4*x],
bc[4*x+1],
bc[4*x+2],
0.0f
)
;
float4
bcy
=
(
float4
)
(
bc[4*y],
bc[4*y+1],
bc[4*y+2],
0.0f
)
;
float4
delta
=
bcx-bcy
;
float4
boxSizea
=
(
float4
)
(
bb[4*x],
bb[4*x+1],
bb[4*x+2],
0.0f
)
;
float4
boxSizeb
=
(
float4
)
(
bb[4*y],
bb[4*y+1],
bb[4*y+2],
0.0f
)
;
#
else
float4
delta
=
blockCenter[x]-blockCenter[y]
;
float4
delta
=
blockCenter[x]-blockCenter[y]
;
float4
boxSizea
=
blockBoundingBox[x]
;
float4
boxSizeb
=
blockBoundingBox[y]
;
#
endif
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.x
-=
floor
(
delta.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.y
-=
floor
(
delta.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.y
-=
floor
(
delta.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.z
-=
floor
(
delta.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
delta.z
-=
floor
(
delta.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
#
endif
#
endif
float4
boxSizea
=
blockBoundingBox[x]
;
float4
boxSizeb
=
blockBoundingBox[y]
;
delta.x
=
max
(
0.0f,
fabs
(
delta.x
)
-boxSizea.x-boxSizeb.x
)
;
delta.x
=
max
(
0.0f,
fabs
(
delta.x
)
-boxSizea.x-boxSizeb.x
)
;
delta.y
=
max
(
0.0f,
fabs
(
delta.y
)
-boxSizea.y-boxSizeb.y
)
;
delta.y
=
max
(
0.0f,
fabs
(
delta.y
)
-boxSizea.y-boxSizeb.y
)
;
delta.z
=
max
(
0.0f,
fabs
(
delta.z
)
-boxSizea.z-boxSizeb.z
)
;
delta.z
=
max
(
0.0f,
fabs
(
delta.z
)
-boxSizea.z-boxSizeb.z
)
;
...
...
platforms/opencl/src/kernels/pme.cl
View file @
6bea0a80
...
@@ -278,10 +278,29 @@ __kernel void gridInterpolateForce(__global const float4* restrict posq, __globa
...
@@ -278,10 +278,29 @@ __kernel void gridInterpolateForce(__global const float4* restrict posq, __globa
float
gridvalue
=
pmeGrid[index].x
;
float
gridvalue
=
pmeGrid[index].x
;
force.x
+=
ddata[ix].x*data[iy].y*data[iz].z*gridvalue
;
force.x
+=
ddata[ix].x*data[iy].y*data[iz].z*gridvalue
;
force.y
+=
data[ix].x*ddata[iy].y*data[iz].z*gridvalue
;
force.y
+=
data[ix].x*ddata[iy].y*data[iz].z*gridvalue
;
#
ifndef
MAC_AMD_WORKAROUND
force.z
+=
data[ix].x*data[iy].y*ddata[iz].z*gridvalue
;
force.z
+=
data[ix].x*data[iy].y*ddata[iz].z*gridvalue
;
#
endif
}
}
}
}
}
}
#
ifdef
MAC_AMD_WORKAROUND
for
(
int
ix
=
0
; ix < PME_ORDER; ix++) {
int
xindex
=
gridIndex.x+ix
;
xindex
-=
(
xindex
>=
GRID_SIZE_X
?
GRID_SIZE_X
:
0
)
;
for
(
int
iy
=
0
; iy < PME_ORDER; iy++) {
int
yindex
=
gridIndex.y+iy
;
yindex
-=
(
yindex
>=
GRID_SIZE_Y
?
GRID_SIZE_Y
:
0
)
;
for
(
int
iz
=
0
; iz < PME_ORDER; iz++) {
int
zindex
=
gridIndex.z+iz
;
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
)
;
int
index
=
xindex*GRID_SIZE_Y*GRID_SIZE_Z
+
yindex*GRID_SIZE_Z
+
zindex
;
float
gridvalue
=
pmeGrid[index].x
;
force.z
+=
data[ix].x*data[iy].y*ddata[iz].z*gridvalue
;
}
}
}
#
endif
float4
totalForce
=
forceBuffers[atom]
;
float4
totalForce
=
forceBuffers[atom]
;
float
q
=
pos.w*EPSILON_FACTOR
;
float
q
=
pos.w*EPSILON_FACTOR
;
totalForce.x
-=
q*force.x*GRID_SIZE_X*invPeriodicBoxSize.x
;
totalForce.x
-=
q*force.x*GRID_SIZE_X*invPeriodicBoxSize.x
;
...
...
platforms/opencl/src/kernels/sort.cl
View file @
6bea0a80
...
@@ -60,7 +60,12 @@ __kernel void assignElementsToBuckets(__global const TYPE* restrict data, int le
...
@@ -60,7 +60,12 @@ __kernel void assignElementsToBuckets(__global const TYPE* restrict data, int le
float
maxValue
=
dataRange.y
;
float
maxValue
=
dataRange.y
;
float
bucketWidth
=
(
maxValue-minValue
)
/numBuckets
;
float
bucketWidth
=
(
maxValue-minValue
)
/numBuckets
;
for
(
int
index
=
get_global_id
(
0
)
; index < length; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < length; index += get_global_size(0)) {
#
ifdef
MAC_AMD_WORKAROUND
__global
int*
d
=
(
__global
int*
)
data
;
int2
element
=
(
int2
)
(
d[2*index],
d[2*index+1]
)
;
#
else
TYPE
element
=
data[index]
;
TYPE
element
=
data[index]
;
#
endif
float
value
=
getValue
(
element
)
;
float
value
=
getValue
(
element
)
;
int
bucketIndex
=
min
((
int
)
((
value-minValue
)
/bucketWidth
)
,
numBuckets-1
)
;
int
bucketIndex
=
min
((
int
)
((
value-minValue
)
/bucketWidth
)
,
numBuckets-1
)
;
offsetInBucket[index]
=
atom_inc
(
&bucketOffset[bucketIndex]
)
;
offsetInBucket[index]
=
atom_inc
(
&bucketOffset[bucketIndex]
)
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment