Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
5ecc8e00
"plugins/vscode:/vscode.git/clone" did not exist on "af51d7518132cd329b986cef57520fcc5ddbf7d5"
Commit
5ecc8e00
authored
May 25, 2016
by
Peter Eastman
Browse files
OpenCL version of GayBerneForce requires 64 bit atomics
parent
f1ca1e6d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
6 additions
and
66 deletions
+6
-66
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+6
-10
platforms/opencl/src/kernels/gayBerne.cl
platforms/opencl/src/kernels/gayBerne.cl
+0
-56
No files found.
platforms/opencl/src/OpenCLKernels.cpp
View file @
5ecc8e00
...
...
@@ -6051,6 +6051,8 @@ OpenCLCalcGayBerneForceKernel::~OpenCLCalcGayBerneForceKernel() {
}
void
OpenCLCalcGayBerneForceKernel
::
initialize
(
const
System
&
system
,
const
GayBerneForce
&
force
)
{
if
(
!
cl
.
getSupports64BitGlobalAtomics
())
throw
OpenMMException
(
"GayBerneForce requires a device that supports 64 bit atomic operations"
);
// Initialize interactions.
...
...
@@ -6123,12 +6125,9 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
neighborIndex
=
OpenCLArray
::
create
<
cl_int
>
(
cl
,
maxNeighborBlocks
,
"neighbors"
);
neighborBlockCount
=
OpenCLArray
::
create
<
cl_int
>
(
cl
,
1
,
"neighborBlockCount"
);
// Create array
s
for accumulating torques.
// Create array for accumulating torques.
if
(
cl
.
getSupports64BitGlobalAtomics
())
torque
=
OpenCLArray
::
create
<
cl_long
>
(
cl
,
3
*
cl
.
getPaddedNumAtoms
(),
"torque"
);
else
torque
=
new
OpenCLArray
(
cl
,
cl
.
getPaddedNumAtoms
()
*
cl
.
getNonbondedUtilities
().
getNumForceBuffers
(),
4
*
elementSize
,
"torque"
);
torque
=
OpenCLArray
::
create
<
cl_long
>
(
cl
,
3
*
cl
.
getPaddedNumAtoms
(),
"torque"
);
cl
.
addAutoclearBuffer
(
*
torque
);
// Create the kernels.
...
...
@@ -6195,9 +6194,8 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
neighborsKernel
.
setArg
<
cl
::
Buffer
>
(
12
,
neighborBlockCount
->
getDeviceBuffer
());
neighborsKernel
.
setArg
<
cl
::
Buffer
>
(
13
,
exclusions
->
getDeviceBuffer
());
neighborsKernel
.
setArg
<
cl
::
Buffer
>
(
14
,
exclusionStartIndex
->
getDeviceBuffer
());
bool
useLong
=
cl
.
getSupports64BitGlobalAtomics
();
int
index
=
0
;
forceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
(
useLong
?
cl
.
getLongForceBuffer
().
getDeviceBuffer
()
:
cl
.
getForceBuffers
().
getDeviceBuffer
())
);
forceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getLongForceBuffer
().
getDeviceBuffer
());
forceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
torque
->
getDeviceBuffer
());
forceKernel
.
setArg
<
cl_int
>
(
index
++
,
numRealParticles
);
forceKernel
.
setArg
<
cl_int
>
(
index
++
,
exceptionAtoms
.
size
());
...
...
@@ -6220,10 +6218,8 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
forceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
neighborBlockCount
->
getDeviceBuffer
());
}
index
=
0
;
torqueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
(
useLong
?
cl
.
getLongForceBuffer
().
getDeviceBuffer
()
:
cl
.
getForceBuffers
().
getDeviceBuffer
())
);
torqueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getLongForceBuffer
().
getDeviceBuffer
());
torqueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
torque
->
getDeviceBuffer
());
if
(
!
useLong
)
torqueKernel
.
setArg
<
cl_int
>
(
index
++
,
cl
.
getNumForceBuffers
());
torqueKernel
.
setArg
<
cl_int
>
(
index
++
,
numRealParticles
);
torqueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getPosq
().
getDeviceBuffer
());
torqueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
axisParticleIndices
->
getDeviceBuffer
());
...
...
platforms/opencl/src/kernels/gayBerne.cl
View file @
5ecc8e00
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics
:
enable
#
ifdef
SUPPORTS_64_BIT_ATOMICS
#
pragma
OPENCL
EXTENSION
cl_khr_int64_base_atomics
:
enable
#
endif
#
define
TILE_SIZE
32
#
define
NEIGHBOR_BLOCK_SIZE
32
...
...
@@ -343,11 +341,7 @@ void computeOneInteraction(AtomData* data1, AtomData* data2, real sigma, real ep
*
Compute
the
interactions.
*/
__kernel
void
computeForce
(
#
ifdef
SUPPORTS_64_BIT_ATOMICS
__global
long*
restrict
forceBuffers,
__global
long*
restrict
torqueBuffers,
#
else
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
torqueBuffers,
#
endif
int
numAtoms,
int
numExceptions,
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
pos,
__global
const
float4*
restrict
sigParams,
__global
const
float2*
restrict
epsParams,
__global
const
int*
restrict
sortedAtoms,
__global
const
real*
restrict
aMatrix,
__global
const
real*
restrict
bMatrix,
__global
const
real*
restrict
gMatrix,
...
...
@@ -395,31 +389,19 @@ __kernel void computeForce(
real
sigma
=
data1.sig.x+data2.sig.x
;
real
epsilon
=
data1.eps.x*data2.eps.x
;
computeOneInteraction
(
&data1,
&data2,
sigma,
epsilon,
delta,
r2,
&force1,
&force2,
&torque1,
&torque2,
&energy
)
;
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[index2],
(
long
)
(
force2.x*0x100000000
))
;
atom_add
(
&forceBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
force2.y*0x100000000
))
;
atom_add
(
&forceBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
force2.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index2],
(
long
)
(
torque2.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
torque2.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
torque2.z*0x100000000
))
;
#
else
unsigned
int
offset
=
index2
+
warp*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
+=
force2.xyz
;
torqueBuffers[offset].xyz
+=
torque2.xyz
;
#
endif
}
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[index1],
(
long
)
(
force1.x*0x100000000
))
;
atom_add
(
&forceBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
force1.y*0x100000000
))
;
atom_add
(
&forceBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
force1.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index1],
(
long
)
(
torque1.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
torque1.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
torque1.z*0x100000000
))
;
#
else
unsigned
int
offset
=
index1
+
warp*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
+=
force1.xyz
;
torqueBuffers[offset].xyz
+=
torque1.xyz
;
#
endif
}
#
else
for
(
int
atom1
=
get_global_id
(
0
)
; atom1 < numAtoms; atom1 += get_global_size(0)) {
...
...
@@ -455,31 +437,19 @@ __kernel void computeForce(
real
sigma
=
data1.sig.x+data2.sig.x
;
real
epsilon
=
data1.eps.x*data2.eps.x
;
computeOneInteraction
(
&data1,
&data2,
sigma,
epsilon,
delta,
r2,
&force1,
&force2,
&torque1,
&torque2,
&energy
)
;
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[index2],
(
long
)
(
force2.x*0x100000000
))
;
atom_add
(
&forceBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
force2.y*0x100000000
))
;
atom_add
(
&forceBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
force2.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index2],
(
long
)
(
torque2.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
torque2.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
torque2.z*0x100000000
))
;
#
else
unsigned
int
offset
=
index2
+
warp*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
+=
force2.xyz
;
torqueBuffers[offset].xyz
+=
torque2.xyz
;
#
endif
}
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[index1],
(
long
)
(
force1.x*0x100000000
))
;
atom_add
(
&forceBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
force1.y*0x100000000
))
;
atom_add
(
&forceBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
force1.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index1],
(
long
)
(
torque1.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
torque1.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
torque1.z*0x100000000
))
;
#
else
unsigned
int
offset
=
index1
+
warp*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
+=
force1.xyz
;
torqueBuffers[offset].xyz
+=
torque1.xyz
;
#
endif
}
#
endif
...
...
@@ -501,7 +471,6 @@ __kernel void computeForce(
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
computeOneInteraction
(
&data1,
&data2,
params.x,
params.y,
delta,
r2,
&force1,
&force2,
&torque1,
&torque2,
&energy
)
;
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[index1],
(
long
)
(
force1.x*0x100000000
))
;
atom_add
(
&forceBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
force1.y*0x100000000
))
;
atom_add
(
&forceBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
force1.z*0x100000000
))
;
...
...
@@ -514,14 +483,6 @@ __kernel void computeForce(
atom_add
(
&torqueBuffers[index2],
(
long
)
(
torque2.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
torque2.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
torque2.z*0x100000000
))
;
#
else
int
offset
=
index1
+
warp*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
+=
force1.xyz
;
torqueBuffers[offset].xyz
+=
torque1.xyz
;
offset
=
index2
+
warp*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
+=
force2.xyz
;
torqueBuffers[offset].xyz
+=
torque2.xyz
;
#
endif
#
ifdef
USE_CUTOFF
}
#
endif
...
...
@@ -533,11 +494,7 @@ __kernel void computeForce(
*
Convert
the
torques
to
forces
on
the
connected
particles.
*/
__kernel
void
applyTorques
(
#
ifdef
SUPPORTS_64_BIT_ATOMICS
__global
long*
restrict
forceBuffers,
__global
long*
restrict
torqueBuffers,
#
else
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
torqueBuffers,
int
numBuffers,
#
endif
int
numParticles,
__global
const
real4*
restrict
posq,
__global
int2*
const
restrict
axisParticleIndices,
__global
const
int*
sortedParticles
)
{
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
...
...
@@ -548,14 +505,8 @@ __kernel void applyTorques(
if
(
axisParticles.x
!=
-1
)
{
//
Load
the
torque.
#
ifdef
SUPPORTS_64_BIT_ATOMICS
real
scale
=
1/
(
real
)
0x100000000
;
real3
torque
=
(
real3
)
(
scale*torqueBuffers[originalIndex],
scale*torqueBuffers[originalIndex+PADDED_NUM_ATOMS],
scale*torqueBuffers[originalIndex+2*PADDED_NUM_ATOMS]
)
;
#
else
real3
torque
=
0
;
for
(
int
i
=
0
; i < numBuffers; i++)
torque
+=
torqueBuffers[originalIndex+i*PADDED_NUM_ATOMS].xyz
;
#
endif
real3
force
=
0
,
xforce
=
0
,
yforce
=
0
;
//
Apply
a
force
to
the
x
particle.
...
...
@@ -576,7 +527,6 @@ __kernel void applyTorques(
yforce
+=
f
;
force
-=
f
;
}
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[originalIndex],
(
long
)
(
force.x*0x100000000
))
;
atom_add
(
&forceBuffers[originalIndex+PADDED_NUM_ATOMS],
(
long
)
(
force.y*0x100000000
))
;
atom_add
(
&forceBuffers[originalIndex+2*PADDED_NUM_ATOMS],
(
long
)
(
force.z*0x100000000
))
;
...
...
@@ -588,12 +538,6 @@ __kernel void applyTorques(
atom_add
(
&forceBuffers[axisParticles.y+PADDED_NUM_ATOMS],
(
long
)
(
yforce.y*0x100000000
))
;
atom_add
(
&forceBuffers[axisParticles.y+2*PADDED_NUM_ATOMS],
(
long
)
(
yforce.z*0x100000000
))
;
}
#
else
forceBuffers[originalIndex
+
warp*PADDED_NUM_ATOMS].xyz
+=
force.xyz
;
forceBuffers[axisParticles.x
+
warp*PADDED_NUM_ATOMS].xyz
+=
xforce.xyz
;
if
(
axisParticles.y
!=
-1
)
forceBuffers[axisParticles.y
+
warp*PADDED_NUM_ATOMS].xyz
+=
yforce.xyz
;
#
endif
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment