Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
3250fb43
Commit
3250fb43
authored
May 24, 2016
by
peastman
Browse files
Continuing OpenCL implementation of GayBerneForce
parent
522e133e
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
175 additions
and
41 deletions
+175
-41
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+22
-7
platforms/opencl/src/kernels/gayBerne.cl
platforms/opencl/src/kernels/gayBerne.cl
+153
-34
No files found.
platforms/opencl/src/OpenCLKernels.cpp
View file @
3250fb43
...
...
@@ -6120,7 +6120,7 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
sortedPos = new OpenCLArray(cl, numRealParticles, 4*elementSize, "sortedPos");
maxNeighborBlocks = numRealParticles*2;
neighbors = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks*32, "neighbors");
neighborIndex
=
OpenCLArray
::
create
<
mm
_int
2
>
(
cl
,
maxNeighborBlocks
,
"neighbors"
);
neighborIndex = OpenCLArray::create<
cl
_int>(cl, maxNeighborBlocks, "neighbors");
neighborBlockCount = OpenCLArray::create<cl_int>(cl, 1, "neighborBlockCount");
// Create arrays for accumulating torques.
...
...
@@ -6128,7 +6128,7 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
if (cl.getSupports64BitGlobalAtomics())
torque = OpenCLArray::create<cl_long>(cl, 3*cl.getPaddedNumAtoms(), "torque");
else
torque
=
new
OpenCLArray
(
cl
,
cl
.
getPaddedNumAtoms
()
*
cl
.
getNonbondedUtilities
().
getNumForceBuffers
(),
elementSize
,
"torque"
);
torque = new OpenCLArray(cl, cl.getPaddedNumAtoms()*cl.getNonbondedUtilities().getNumForceBuffers(),
4*
elementSize, "torque");
cl.addAutoclearBuffer(*torque);
// Create the kernels.
...
...
@@ -6138,14 +6138,20 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
bool usePeriodic = (nonbondedMethod == GayBerneForce::CutoffPeriodic);
map<string, string> defines;
defines["USE_SWITCH"] = (useCutoff && force.getUseSwitchingFunction() ? "1" : "0");
double cutoff = force.getCutoffDistance();
defines["CUTOFF_SQUARED"] = cl.doubleToString(cutoff*cutoff);
if (useCutoff) {
defines["USE_CUTOFF"] = 1;
if (usePeriodic)
defines["USE_PERIODIC"] = "1";
// Compute the switching coefficients.
if (force.getUseSwitchingFunction()) {
defines["SWITCH_CUTOFF"] = cl.doubleToString(force.getSwitchingDistance());
defines
[
"SWITCH_C3"
]
=
cl
.
doubleToString
(
10
/
pow
(
force
.
getSwitchingDistance
()
-
force
.
getCutoffDistance
()
,
3.0
));
defines
[
"SWITCH_C4"
]
=
cl
.
doubleToString
(
15
/
pow
(
force
.
getSwitchingDistance
()
-
force
.
getCutoffDistance
()
,
4.0
));
defines
[
"SWITCH_C5"
]
=
cl
.
doubleToString
(
6
/
pow
(
force
.
getSwitchingDistance
()
-
force
.
getCutoffDistance
()
,
5.0
));
defines["SWITCH_C3"] = cl.doubleToString(10/pow(force.getSwitchingDistance()-
cutoff
, 3.0));
defines["SWITCH_C4"] = cl.doubleToString(15/pow(force.getSwitchingDistance()-
cutoff
, 4.0));
defines["SWITCH_C5"] = cl.doubleToString(6/pow(force.getSwitchingDistance()-
cutoff
, 5.0));
}
}
if (!cl.getSupports64BitGlobalAtomics()) {
...
...
@@ -6189,6 +6195,8 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
neighborsKernel.setArg<cl::Buffer>(10, neighbors->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(11, neighborIndex->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(12, neighborBlockCount->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(13, exclusions->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(14, exclusionStartIndex->getDeviceBuffer());
bool useLong = cl.getSupports64BitGlobalAtomics();
int index = 0;
forceKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
...
...
@@ -6207,6 +6215,12 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exceptionParticles->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exceptionParams->getDeviceBuffer());
if (nonbondedMethod != GayBerneForce::NoCutoff) {
forceKernel.setArg<cl_int>(index++, maxNeighborBlocks);
forceKernel.setArg<cl::Buffer>(index++, neighbors->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighborIndex->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighborBlockCount->getDeviceBuffer());
}
index = 0;
torqueKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
torqueKernel.setArg<cl::Buffer>(index++, torque->getDeviceBuffer());
...
...
@@ -6223,6 +6237,7 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
if (nonbondedMethod != GayBerneForce::NoCutoff) {
setPeriodicBoxArgs(cl, neighborsKernel, 2);
cl.executeKernel(neighborsKernel, numRealParticles);
setPeriodicBoxArgs(cl, forceKernel, 20);
}
cl.executeKernel(forceKernel, cl.getNonbondedUtilities().getNumForceThreadBlocks()*cl.getNonbondedUtilities().getForceThreadBlockSize());
cl.executeKernel(torqueKernel, numRealParticles);
...
...
@@ -6317,8 +6332,8 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
vector<vector<int> > excludedAtoms(numRealParticles);
for (int i = 0; i < excludedPairs.size(); i++) {
int
first
=
min
(
excludedPairs
[
i
].
first
,
excludedPairs
[
i
].
second
);
int
second
=
max
(
excludedPairs
[
i
].
first
,
excludedPairs
[
i
].
second
);
int first =
inverseOrder[
min(excludedPairs[i].first, excludedPairs[i].second)
]
;
int second =
inverseOrder[
max(excludedPairs[i].first, excludedPairs[i].second)
]
;
excludedAtoms[first].push_back(second);
}
int index = 0;
...
...
platforms/opencl/src/kernels/gayBerne.cl
View file @
3250fb43
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics
:
enable
#
define
TILE_SIZE
32
#
define
NEIGHBOR_BLOCK_SIZE
32
/**
*
Calculate
the
ellipsoid
coordinate
frames
and
associated
matrices.
...
...
@@ -103,12 +104,79 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
*neighborBlockCount
=
0
;
}
/**
*
This
is
called
by
findNeighbors
()
to
write
a
block
to
the
neighbor
list.
*/
void
storeNeighbors
(
int
atom1,
int*
neighborBuffer,
int
numAtomsInBuffer,
int
maxNeighborBlocks,
__global
int*
restrict
neighbors,
__global
int*
restrict
neighborIndex,
__global
int*
restrict
neighborBlockCount
)
{
int
blockIndex
=
atom_add
(
neighborBlockCount,
1
)
;
if
(
blockIndex
>=
maxNeighborBlocks
)
return
; // We don't have enough room for the neighbor list.
neighborIndex[blockIndex]
=
atom1
;
int
baseIndex
=
blockIndex*NEIGHBOR_BLOCK_SIZE
;
for
(
int
i
=
0
; i < numAtomsInBuffer; i++)
neighbors[baseIndex+i]
=
neighborBuffer[i]
;
for
(
int
i
=
numAtomsInBuffer
; i < NEIGHBOR_BLOCK_SIZE; i++)
neighbors[baseIndex+i]
=
-1
;
}
/**
*
Build
a
list
of
neighbors
for
each
atom.
*/
__kernel
void
findNeighbors
(
int
numAtoms,
int
maxNeighborBlocks,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__global
real4*
restrict
sortedPos,
__global
real4*
restrict
blockCenter,
__global
real4*
restrict
blockBoundingBox,
__global
int*
restrict
neighbors,
__global
int2*
restrict
neighborIndex,
__global
int*
restrict
neighborBlockCount
)
{
__global
int*
restrict
neighborIndex,
__global
int*
restrict
neighborBlockCount,
__global
const
int*
restrict
exclusions,
__global
const
int*
restrict
exclusionStartIndex
)
{
const
int
numBlocks
=
(
numAtoms+TILE_SIZE-1
)
/TILE_SIZE
;
int
neighborBuffer[NEIGHBOR_BLOCK_SIZE]
;
for
(
int
atom1
=
get_global_id
(
0
)
; atom1 < numAtoms; atom1 += get_global_size(0)) {
int
nextExclusion
=
exclusionStartIndex[atom1]
;
int
lastExclusion
=
exclusionStartIndex[atom1+1]
;
real4
pos
=
sortedPos[atom1]
;
int
nextBufferIndex
=
0
;
//
Loop
over
atom
blocks
and
compute
the
distance
of
this
atom
from
each
one
's
bounding
box.
for
(
int
block
=
(
atom1+1
)
/TILE_SIZE
; block < numBlocks; block++) {
real4
center
=
blockCenter[block]
;
real4
blockSize
=
blockBoundingBox[block]
;
real4
blockDelta
=
center-pos
;
#
ifdef
USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
blockDelta
)
#
endif
blockDelta.x
=
max
((
real
)
0
,
fabs
(
blockDelta.x
)
-blockSize.x
)
;
blockDelta.y
=
max
((
real
)
0
,
fabs
(
blockDelta.y
)
-blockSize.y
)
;
blockDelta.z
=
max
((
real
)
0
,
fabs
(
blockDelta.z
)
-blockSize.z
)
;
if
(
blockDelta.x*blockDelta.x+blockDelta.y*blockDelta.y+blockDelta.z*blockDelta.z
>=
CUTOFF_SQUARED
)
continue
;
//
Loop
over
atoms
within
this
block.
int
first
=
max
(
block*TILE_SIZE,
atom1+1
)
;
int
last
=
min
((
block+1
)
*TILE_SIZE,
numAtoms
)
;
for
(
int
atom2
=
first
; atom2 < last; atom2++) {
//
Skip
over
excluded
interactions.
if
(
nextExclusion
<
lastExclusion
&&
exclusions[nextExclusion]
>=
atom2
)
{
nextExclusion++
;
continue
;
}
real4
delta
=
pos-sortedPos[atom2]
;
#
ifdef
USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
if
(
r2
<
CUTOFF_SQUARED
)
{
neighborBuffer[nextBufferIndex++]
=
atom2
;
if
(
nextBufferIndex
==
NEIGHBOR_BLOCK_SIZE
)
{
storeNeighbors
(
atom1,
neighborBuffer,
nextBufferIndex,
maxNeighborBlocks,
neighbors,
neighborIndex,
neighborBlockCount
)
;
nextBufferIndex
=
0
;
}
}
}
}
if
(
nextBufferIndex
>
0
)
storeNeighbors
(
atom1,
neighborBuffer,
nextBufferIndex,
maxNeighborBlocks,
neighbors,
neighborIndex,
neighborBlockCount
)
;
}
}
typedef
struct
{
...
...
@@ -184,11 +252,11 @@ void computeOneInteraction(AtomData* data1, AtomData* data2, real sigma, real ep
//
Compute
the
switching
function.
real
switchValue
=
1
,
switchDeriv
=
0
;
#
if
USE_
LJ_
SWITCH
if
(
r
>
LJ_
SWITCH_CUTOFF
)
{
real
x
=
r-
LJ_
SWITCH_CUTOFF
;
switchValue
=
1+x*x*x*
(
LJ_
SWITCH_C3+x*
(
LJ_
SWITCH_C4+x*
LJ_
SWITCH_C5
))
;
switchDeriv
=
x*x*
(
3*
LJ_
SWITCH_C3+x*
(
4*
LJ_
SWITCH_C4+x*5*
LJ_
SWITCH_C5
))
;
#
if
USE_SWITCH
if
(
r
>
SWITCH_CUTOFF
)
{
real
x
=
r-SWITCH_CUTOFF
;
switchValue
=
1+x*x*x*
(
SWITCH_C3+x*
(
SWITCH_C4+x*SWITCH_C5
))
;
switchDeriv
=
x*x*
(
3*SWITCH_C3+x*
(
4*SWITCH_C4+x*5*SWITCH_C5
))
;
}
#
endif
...
...
@@ -265,7 +333,7 @@ void computeOneInteraction(AtomData* data1, AtomData* data2, real sigma, real ep
d[2][2]
=
scale[2]*
(
a[2][0]*
(
G12[0][1]*G12[1][2]
+
G12[2][1]*G12[1][0]
-
G12[1][1]*
(
G12[0][2]
+
G12[2][0]
))
+
a[2][1]*
(
G12[1][0]*G12[0][2]
+
G12[2][0]*G12[0][1]
-
G12[0][0]*
(
G12[1][2]
+
G12[2][1]
))
+
2*a[2][2]*
(
G12[1][1]*G12[0][0]
-
G12[1][0]*G12[0][1]
))
;
real3
detadq
;
real3
detadq
=
0
;
for
(
int
i
=
0
; i < 3; i++)
detadq
+=
cross
((
real3
)
(
a[i][0],
a[i][1],
a[i][2]
)
,
(
real3
)
(
d[i][0],
d[i][1],
d[i][2]
))
;
real3
torque
=
(
dchidq*
(
u*eta
)
+
detadq*
(
u*chi
)
+
dudq*
(
eta*chi
))
*switchValue
;
...
...
@@ -289,13 +357,72 @@ __kernel void computeForce(
__global
const
int*
restrict
exclusions,
__global
const
int*
restrict
exclusionStartIndex,
__global
const
int4*
restrict
exceptionParticles,
__global
const
float2*
restrict
exceptionParams
#
ifdef
USE_CUTOFF
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms,
,
int
maxNeighborBlocks,
__global
int*
restrict
neighbors,
__global
int*
restrict
neighborIndex,
__global
int*
restrict
neighborBlockCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
#
endif
)
{
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
mixed
energy
=
0
;
#
ifdef
USE_CUTOFF
const
int
numBlocks
=
*neighborBlockCount
;
for
(
int
block
=
get_global_id
(
0
)
; block < numBlocks; block += get_global_size(0)) {
//
Load
parameters
for
atom1.
int
atom1
=
neighborIndex[block]
;
int
index1
=
sortedAtoms[atom1]
;
AtomData
data1
;
loadAtomData
(
&data1,
atom1,
index1,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
real3
force1
=
0.0f
;
real3
torque1
=
0.0f
;
for
(
int
indexInBlock
=
0
; indexInBlock < NEIGHBOR_BLOCK_SIZE; indexInBlock++) {
//
Load
parameters
for
atom2.
int
atom2
=
neighbors[NEIGHBOR_BLOCK_SIZE*block+indexInBlock]
;
if
(
atom2
==
-1
)
continue
;
int
index2
=
sortedAtoms[atom2]
;
AtomData
data2
;
loadAtomData
(
&data2,
atom2,
index2,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
real3
force2
=
0.0f
;
real3
torque2
=
0.0f
;
//
Compute
the
interaction.
real3
delta
=
data1.pos-data2.pos
;
#
ifdef
USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
sigma
=
data1.sig.x+data2.sig.x
;
real
epsilon
=
data1.eps.x*data2.eps.x
;
computeOneInteraction
(
&data1,
&data2,
sigma,
epsilon,
delta,
r2,
&force1,
&force2,
&torque1,
&torque2,
&energy
)
;
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[index2],
(
long
)
(
force2.x*0x100000000
))
;
atom_add
(
&forceBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
force2.y*0x100000000
))
;
atom_add
(
&forceBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
force2.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index2],
(
long
)
(
torque2.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
torque2.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
torque2.z*0x100000000
))
;
#
else
unsigned
int
offset
=
index2
+
warp*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
+=
force2.xyz
;
torqueBuffers[offset].xyz
+=
torque2.xyz
;
#
endif
}
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[index1],
(
long
)
(
force1.x*0x100000000
))
;
atom_add
(
&forceBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
force1.y*0x100000000
))
;
atom_add
(
&forceBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
force1.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index1],
(
long
)
(
torque1.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
torque1.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
torque1.z*0x100000000
))
;
#
else
unsigned
int
offset
=
index1
+
warp*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
+=
force1.xyz
;
torqueBuffers[offset].xyz
+=
torque1.xyz
;
#
endif
}
#
else
for
(
int
atom1
=
get_global_id
(
0
)
; atom1 < numAtoms; atom1 += get_global_size(0)) {
//
Load
parameters
for
atom1.
...
...
@@ -309,14 +436,14 @@ __kernel void computeForce(
for
(
int
atom2
=
atom1+1
; atom2 < numAtoms; atom2++) {
//
Skip
over
excluded
interactions.
int
index2
=
sortedAtoms[atom2]
;
if
(
nextExclusion
<
lastExclusion
&&
exclusions[nextExclusion]
==
index2
)
{
if
(
nextExclusion
<
lastExclusion
&&
exclusions[nextExclusion]
==
atom2
)
{
nextExclusion++
;
continue
;
}
//
Load
parameters
for
atom2.
int
index2
=
sortedAtoms[atom2]
;
AtomData
data2
;
loadAtomData
(
&data2,
atom2,
index2,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
real3
force2
=
0.0f
;
...
...
@@ -325,30 +452,21 @@ __kernel void computeForce(
//
Compute
the
interaction.
real3
delta
=
data1.pos-data2.pos
;
#
ifdef
USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
real
sigma
=
data1.sig.x+data2.sig.x
;
real
epsilon
=
data1.eps.x*data2.eps.x
;
computeOneInteraction
(
&data1,
&data2,
sigma,
epsilon,
delta,
r2,
&force1,
&force2,
&torque1,
&torque2,
&energy
)
;
real
sigma
=
data1.sig.x+data2.sig.x
;
real
epsilon
=
data1.eps.x*data2.eps.x
;
computeOneInteraction
(
&data1,
&data2,
sigma,
epsilon,
delta,
r2,
&force1,
&force2,
&torque1,
&torque2,
&energy
)
;
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[index2],
(
long
)
(
force2.x*0x100000000
))
;
atom_add
(
&forceBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
force2.y*0x100000000
))
;
atom_add
(
&forceBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
force2.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index2],
(
long
)
(
torque2.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
torque2.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
torque2.z*0x100000000
))
;
atom_add
(
&forceBuffers[index2],
(
long
)
(
force2.x*0x100000000
))
;
atom_add
(
&forceBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
force2.y*0x100000000
))
;
atom_add
(
&forceBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
force2.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index2],
(
long
)
(
torque2.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
torque2.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
torque2.z*0x100000000
))
;
#
else
unsigned
int
offset
=
index2
+
warp*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
+=
force2.xyz
;
torqueBuffers[offset].xyz
+=
torque2.xyz
;
#
endif
#
ifdef
USE_CUTOFF
}
unsigned
int
offset
=
index2
+
warp*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
+=
force2.xyz
;
torqueBuffers[offset].xyz
+=
torque2.xyz
;
#
endif
}
#
ifdef
SUPPORTS_64_BIT_ATOMICS
...
...
@@ -364,12 +482,13 @@ __kernel void computeForce(
torqueBuffers[offset].xyz
+=
torque1.xyz
;
#
endif
}
#
endif
//
Now
compute
exceptions.
for
(
int
index
=
get_global_id
(
0
)
; index < numExceptions; index += get_global_size(0)) {
int4
atomIndices
=
exceptionParticles[index]
;
real
2
params
=
exceptionParams[index]
;
float
2
params
=
exceptionParams[index]
;
int
index1
=
atomIndices.x,
index2
=
atomIndices.y
;
int
atom1
=
atomIndices.z,
atom2
=
atomIndices.w
;
AtomData
data1,
data2
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment