Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
522e133e
Commit
522e133e
authored
May 20, 2016
by
peastman
Browse files
Continuing OpenCL implementation of GayBerneForce
parent
69cab567
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
221 additions
and
78 deletions
+221
-78
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+9
-5
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+86
-63
platforms/opencl/src/kernels/gayBerne.cl
platforms/opencl/src/kernels/gayBerne.cl
+126
-10
No files found.
platforms/opencl/include/OpenCLKernels.h
View file @
522e133e
...
...
@@ -1080,9 +1080,10 @@ private:
class
OpenCLCalcGayBerneForceKernel
:
public
CalcGayBerneForceKernel
{
public:
OpenCLCalcGayBerneForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
CalcGayBerneForceKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
),
sortedParticles
(
NULL
),
axisParticleIndices
(
NULL
),
sigParams
(
NULL
),
epsParams
(
NULL
),
scale
(
NULL
),
aMatrix
(
NULL
),
bMatrix
(
NULL
),
gMatrix
(
NULL
),
exclusions
(
NULL
),
exclusionStartIndex
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedPos
(
NULL
),
torque
(
NULL
)
{
hasInitializedKernels
(
false
),
sortedParticles
(
NULL
),
axisParticleIndices
(
NULL
),
sigParams
(
NULL
),
epsParams
(
NULL
),
scale
(
NULL
),
exceptionParticles
(
NULL
),
exceptionParams
(
NULL
),
aMatrix
(
NULL
),
bMatrix
(
NULL
),
gMatrix
(
NULL
),
exclusions
(
NULL
),
exclusionStartIndex
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
neighbors
(
NULL
),
neighborIndex
(
NULL
),
neighborBlockCount
(
NULL
),
sortedPos
(
NULL
),
torque
(
NULL
)
{
}
~
OpenCLCalcGayBerneForceKernel
();
/**
...
...
@@ -1112,7 +1113,7 @@ private:
void
sortAtoms
();
OpenCLContext
&
cl
;
bool
hasInitializedKernels
;
int
numRealParticles
;
int
numRealParticles
,
maxNeighborBlocks
;
GayBerneForce
::
NonbondedMethod
nonbondedMethod
;
OpenCLArray
*
sortedParticles
;
OpenCLArray
*
axisParticleIndices
;
...
...
@@ -1128,12 +1129,15 @@ private:
OpenCLArray
*
exclusionStartIndex
;
OpenCLArray
*
blockCenter
;
OpenCLArray
*
blockBoundingBox
;
OpenCLArray
*
neighbors
;
OpenCLArray
*
neighborIndex
;
OpenCLArray
*
neighborBlockCount
;
OpenCLArray
*
sortedPos
;
OpenCLArray
*
torque
;
std
::
vector
<
bool
>
isRealParticle
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
excludedPairs
;
cl
::
Kernel
framesKernel
,
blockBoundsKernel
,
neighborsKernel
,
forceKernel
;
cl
::
Kernel
framesKernel
,
blockBoundsKernel
,
neighborsKernel
,
forceKernel
,
torqueKernel
;
};
/**
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
522e133e
...
...
@@ -6038,6 +6038,12 @@ OpenCLCalcGayBerneForceKernel::~OpenCLCalcGayBerneForceKernel() {
delete blockCenter;
if (blockBoundingBox != NULL)
delete blockBoundingBox;
if (neighbors != NULL)
delete neighbors;
if (neighborIndex != NULL)
delete neighborIndex;
if (neighborBlockCount != NULL)
delete neighborBlockCount;
if (sortedPos != NULL)
delete sortedPos;
if (torque != NULL)
...
...
@@ -6062,8 +6068,6 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
vector<mm_float4> scaleVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
vector<mm_int2> axisParticleVector(cl.getPaddedNumAtoms(), mm_int2(0, 0));
isRealParticle.resize(cl.getPaddedNumAtoms());
vector<vector<int> > exclusionList(numParticles);
numRealParticles = 0;
for (int i = 0; i < numParticles; i++) {
int xparticle, yparticle;
double sigma, epsilon, sx, sy, sz, ex, ey, ez;
...
...
@@ -6072,19 +6076,41 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
sigParamsVector[i] = mm_float4((float) (0.5*sigma), (float) (0.25*sx*sx), (float) (0.25*sy*sy), (float) (0.25*sz*sz));
epsParamsVector[i] = mm_float2((float) sqrt(epsilon), (float) (0.125*(sx*sy + sz*sz)*sqrt(sx*sy)));
scaleVector[i] = mm_float4((float) (1/sqrt(ex)), (float) (1/sqrt(ey)), (float) (1/sqrt(ez)), 0);
if (epsilon != 0.0) {
numRealParticles++;
isRealParticle[i] = true;
}
else
isRealParticle[i] = false;
exclusionList[i].push_back(i);
isRealParticle[i] = (epsilon != 0.0);
}
sigParams->upload(sigParamsVector);
epsParams->upload(epsParamsVector);
scale->upload(scaleVector);
axisParticleIndices->upload(axisParticleVector);
// Record exceptions and exclusions.
vector<mm_float2> exceptionParamsVec;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, sigma, epsilon);
if (epsilon != 0.0) {
exceptionParamsVec.push_back(mm_float2((float) sigma, (float) epsilon));
exceptionAtoms.push_back(make_pair(particle1, particle2));
isRealParticle[particle1] = true;
isRealParticle[particle2] = true;
}
if (isRealParticle[particle1] && isRealParticle[particle2])
excludedPairs.push_back(pair<int, int>(particle1, particle2));
}
numRealParticles = 0;
for (int i = 0; i < isRealParticle.size(); i++)
if (isRealParticle[i])
numRealParticles++;
int numExceptions = exceptionParamsVec.size();
exclusions = OpenCLArray::create<cl_int>(cl, max(1, (int) excludedPairs.size()), "exclusions");
exclusionStartIndex = OpenCLArray::create<cl_int>(cl, numRealParticles+1, "exclusionStartIndex");
exceptionParticles = OpenCLArray::create<mm_int4>(cl, max(1, numExceptions), "exceptionParticles");
exceptionParams = OpenCLArray::create<mm_float2>(cl, max(1, numExceptions), "exceptionParams");
if (numExceptions > 0)
exceptionParams->upload(exceptionParamsVec);
// Create data structures used for the neighbor list.
int numAtomBlocks = (numRealParticles+31)/32;
...
...
@@ -6092,6 +6118,10 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
blockCenter = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockBoundingBox");
sortedPos = new OpenCLArray(cl, numRealParticles, 4*elementSize, "sortedPos");
maxNeighborBlocks = numRealParticles*2;
neighbors = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks*32, "neighbors");
neighborIndex = OpenCLArray::create<mm_int2>(cl, maxNeighborBlocks, "neighbors");
neighborBlockCount = OpenCLArray::create<cl_int>(cl, 1, "neighborBlockCount");
// Create arrays for accumulating torques.
...
...
@@ -6126,48 +6156,7 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
blockBoundsKernel = cl::Kernel(program, "findBlockBounds");
neighborsKernel = cl::Kernel(program, "findNeighbors");
forceKernel = cl::Kernel(program, "computeForce");
// Add the interaction to the default nonbonded kernel.
string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines);
// cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("sigParams", "float", 4, sizeof(cl_float4), sigParams->getDeviceBuffer()));
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("epsParams", "float", 2, sizeof(cl_float2), epsParams->getDeviceBuffer()));
// Record exceptions and exclusions.
vector<int> exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, sigma, epsilon);
if (isRealParticle[particle1] && isRealParticle[particle2])
excludedPairs.push_back(pair<int, int>(particle1, particle2));
if (epsilon != 0.0)
exceptions.push_back(i);
}
exclusions = OpenCLArray::create<cl_int>(cl, max(1, (int) excludedPairs.size()), "exclusions");
exclusionStartIndex = OpenCLArray::create<cl_int>(cl, numRealParticles+1, "exclusionStartIndex");
// Initialize the exceptions.
int numExceptions = exceptions.size();
if (numExceptions > 0) {
exceptionAtoms.resize(numExceptions);
vector<vector<int> > atoms(numExceptions, vector<int>(2));
exceptionParams = OpenCLArray::create<mm_float2>(cl, numExceptions, "exceptionParams");
vector<mm_float2> exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double sigma, epsilon;
force.getExceptionParameters(exceptions[i], atoms[i][0], atoms[i][1], sigma, epsilon);
exceptionParamsVector[i] = mm_float2((float) sigma, (float) (4.0*epsilon));
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
}
exceptionParams->upload(exceptionParamsVector);
map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(exceptionParams->getDeviceBuffer(), "float2");
// cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
torqueKernel = cl::Kernel(program, "applyTorques");
cl.addForce(new OpenCLGayBerneForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force));
cl.addReorderListener(new ReorderListener(*this));
}
...
...
@@ -6191,11 +6180,21 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
blockBoundsKernel.setArg<cl::Buffer>(8, sortedPos->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(9, blockCenter->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(10, blockBoundingBox->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(11, neighborBlockCount->getDeviceBuffer());
neighborsKernel.setArg<cl_int>(0, numRealParticles);
neighborsKernel.setArg<cl_int>(1, maxNeighborBlocks);
neighborsKernel.setArg<cl::Buffer>(7, sortedPos->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(8, blockCenter->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(9, blockBoundingBox->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(10, neighbors->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(11, neighborIndex->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(12, neighborBlockCount->getDeviceBuffer());
bool useLong = cl.getSupports64BitGlobalAtomics();
int index = 0;
forceKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
forceKernel.setArg<cl::Buffer>(index++, torque->getDeviceBuffer());
forceKernel.setArg<cl_int>(index++, numRealParticles);
forceKernel.setArg<cl_int>(index++, exceptionAtoms.size());
forceKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sortedPos->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sigParams->getDeviceBuffer());
...
...
@@ -6206,13 +6205,27 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
forceKernel.setArg<cl::Buffer>(index++, gMatrix->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusions->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex->getDeviceBuffer());
}
cl.executeKernel(framesKernel, cl.getNumAtoms());
forceKernel.setArg<cl::Buffer>(index++, exceptionParticles->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exceptionParams->getDeviceBuffer());
index = 0;
torqueKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
torqueKernel.setArg<cl::Buffer>(index++, torque->getDeviceBuffer());
if (!useLong)
torqueKernel.setArg<cl_int>(index++, cl.getNumForceBuffers());
torqueKernel.setArg<cl_int>(index++, numRealParticles);
torqueKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
torqueKernel.setArg<cl::Buffer>(index++, axisParticleIndices->getDeviceBuffer());
torqueKernel.setArg<cl::Buffer>(index++, sortedParticles->getDeviceBuffer());
}
cl.executeKernel(framesKernel, numRealParticles);
setPeriodicBoxArgs(cl, blockBoundsKernel, 1);
cl.executeKernel(blockBoundsKernel, (numRealParticles+31)/32);
if (nonbondedMethod != GayBerneForce::NoCutoff) {
setPeriodicBoxArgs(cl, neighborsKernel, 2);
cl.executeKernel(neighborsKernel, numRealParticles);
}
cl.executeKernel(forceKernel, cl.getNonbondedUtilities().getNumForceThreadBlocks()*cl.getNonbondedUtilities().getForceThreadBlockSize());
cl.executeKernel(torqueKernel, numRealParticles);
return 0.0;
}
...
...
@@ -6231,10 +6244,7 @@ void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context
else if (epsilon != 0.0)
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
}
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cl.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
int numExceptions = exceptionAtoms.size();
// Record the per-particle parameters.
...
...
@@ -6263,14 +6273,14 @@ void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context
// Record the exceptions.
if (numExceptions > 0) {
vector<vector<int> > atoms(numExceptions, vector<int>(2));
vector<mm_float2> exceptionParamsVector(numExceptions);
vector<mm_float2> exceptionParamsVec(numExceptions);
for (int i = 0; i < numExceptions; i++) {
int atom1, atom2;
double sigma, epsilon;
force.getExceptionParameters(exceptions[
startIndex+i], atoms[i][0], atoms[i][1]
, sigma, epsilon);
exceptionParamsVec
tor
[i] = mm_float2((float) sigma, (float)
(4.0*
epsilon)
)
;
force.getExceptionParameters(exceptions[
i], atom1, atom2
, sigma, epsilon);
exceptionParamsVec[i] = mm_float2((float) sigma, (float) epsilon);
}
exceptionParams->upload(exceptionParamsVec
tor
);
exceptionParams->upload(exceptionParamsVec);
}
cl.invalidateMolecules();
sortAtoms();
...
...
@@ -6283,13 +6293,26 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
int nextIndex = 0;
vector<cl_int> particles(cl.getPaddedNumAtoms(), 0);
const vector<int>& order = cl.getAtomIndex();
vector<int> inverseOrder(order.size(), -1);
for (int i = 0; i < cl.getNumAtoms(); i++) {
int atom = order[i];
if (isRealParticle[atom])
if (isRealParticle[atom]) {
inverseOrder[atom] = nextIndex;
particles[nextIndex++] = atom;
}
}
sortedParticles->upload(particles);
// Update the list of exception particles.
int numExceptions = exceptionAtoms.size();
if (numExceptions > 0) {
vector<mm_int4> exceptionParticlesVec(numExceptions);
for (int i = 0; i < numExceptions; i++)
exceptionParticlesVec[i] = mm_int4(exceptionAtoms[i].first, exceptionAtoms[i].second, inverseOrder[exceptionAtoms[i].first], inverseOrder[exceptionAtoms[i].second]);
exceptionParticles->upload(exceptionParticlesVec);
}
// Rebuild the list of exclusions.
vector<vector<int> > excludedAtoms(numRealParticles);
...
...
platforms/opencl/src/kernels/gayBerne.cl
View file @
522e133e
...
...
@@ -71,7 +71,7 @@ __kernel void computeEllipsoidFrames(int numParticles, __global const real4* res
*/
__kernel
void
findBlockBounds
(
int
numAtoms,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__global
const
int*
sortedAtoms,
__global
const
real4*
restrict
posq,
__global
real4*
restrict
sortedPos,
__global
real4*
restrict
blockCenter,
__global
real4*
restrict
blockBoundingBox
)
{
__global
real4*
restrict
blockBoundingBox
,
__global
int*
restrict
neighborBlockCount
)
{
int
index
=
get_global_id
(
0
)
;
int
base
=
index*TILE_SIZE
;
while
(
base
<
numAtoms
)
{
...
...
@@ -99,14 +99,16 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
index
+=
get_global_size
(
0
)
;
base
=
index*TILE_SIZE
;
}
if
(
get_global_id
(
0
)
==
0
)
*neighborBlockCount
=
0
;
}
/**
*
Build
a
list
of
neighbors
for
each
atom.
*/
__kernel
void
findNeighbors
(
int
numAtoms,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__global
const
int*
sorted
Atom
s,
__global
const
real4*
restrict
posq
,
__global
real4*
restrict
sortedPos
,
__global
real4
*
restrict
blockCenter
,
__global
real4
*
restrict
b
lock
B
oun
dingBox
)
{
__kernel
void
findNeighbors
(
int
numAtoms,
int
maxNeighborBlocks,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__global
real4*
restrict
sorted
Po
s,
__global
real4*
restrict
blockCenter
,
__global
real4*
restrict
blockBoundingBox
,
__global
int
*
restrict
neighbors
,
__global
int2*
restrict
neighborIndex,
__global
int
*
restrict
neighborB
lock
C
oun
t
)
{
}
typedef
struct
{
...
...
@@ -281,10 +283,11 @@ __kernel void computeForce(
#
else
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
torqueBuffers,
#
endif
int
numAtoms,
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
pos,
__global
const
float4*
restrict
sigParams,
__global
const
float2*
restrict
epsParams,
__global
const
int*
restrict
sortedAtoms,
__global
const
real*
restrict
aMatrix,
__global
const
real*
restrict
bMatrix,
__global
const
real*
restrict
gMatrix,
__global
const
int*
restrict
exclusions,
__global
const
int*
restrict
exclusionStartIndex
int
numAtoms,
int
numExceptions,
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
pos,
__global
const
float4*
restrict
sigParams,
__global
const
float2*
restrict
epsParams,
__global
const
int*
restrict
sortedAtoms,
__global
const
real*
restrict
aMatrix,
__global
const
real*
restrict
bMatrix,
__global
const
real*
restrict
gMatrix,
__global
const
int*
restrict
exclusions,
__global
const
int*
restrict
exclusionStartIndex,
__global
const
int4*
restrict
exceptionParticles,
__global
const
float2*
restrict
exceptionParams
#
ifdef
USE_CUTOFF
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
...
...
@@ -306,14 +309,14 @@ __kernel void computeForce(
for
(
int
atom2
=
atom1+1
; atom2 < numAtoms; atom2++) {
//
Skip
over
excluded
interactions.
if
(
nextExclusion
<
lastExclusion
&&
exclusions[nextExclusion]
==
atom2
)
{
int
index2
=
sortedAtoms[atom2]
;
if
(
nextExclusion
<
lastExclusion
&&
exclusions[nextExclusion]
==
index2
)
{
nextExclusion++
;
continue
;
}
//
Load
parameters
for
atom2.
int
index2
=
sortedAtoms[atom2]
;
AtomData
data2
;
loadAtomData
(
&data2,
atom2,
index2,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
real3
force2
=
0.0f
;
...
...
@@ -361,5 +364,118 @@ __kernel void computeForce(
torqueBuffers[offset].xyz
+=
torque1.xyz
;
#
endif
}
//
Now
compute
exceptions.
for
(
int
index
=
get_global_id
(
0
)
; index < numExceptions; index += get_global_size(0)) {
int4
atomIndices
=
exceptionParticles[index]
;
real2
params
=
exceptionParams[index]
;
int
index1
=
atomIndices.x,
index2
=
atomIndices.y
;
int
atom1
=
atomIndices.z,
atom2
=
atomIndices.w
;
AtomData
data1,
data2
;
loadAtomData
(
&data1,
atom1,
index1,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
loadAtomData
(
&data2,
atom2,
index2,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
real3
force1
=
0
,
force2
=
0
;
real3
torque1
=
0
,
torque2
=
0
;
real3
delta
=
data1.pos-data2.pos
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
computeOneInteraction
(
&data1,
&data2,
params.x,
params.y,
delta,
r2,
&force1,
&force2,
&torque1,
&torque2,
&energy
)
;
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[index1],
(
long
)
(
force1.x*0x100000000
))
;
atom_add
(
&forceBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
force1.y*0x100000000
))
;
atom_add
(
&forceBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
force1.z*0x100000000
))
;
atom_add
(
&forceBuffers[index2],
(
long
)
(
force2.x*0x100000000
))
;
atom_add
(
&forceBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
force2.y*0x100000000
))
;
atom_add
(
&forceBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
force2.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index1],
(
long
)
(
torque1.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
torque1.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
torque1.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index2],
(
long
)
(
torque2.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
torque2.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
torque2.z*0x100000000
))
;
#
else
int
offset
=
index1
+
warp*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
+=
force1.xyz
;
torqueBuffers[offset].xyz
+=
torque1.xyz
;
offset
=
index2
+
warp*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
+=
force2.xyz
;
torqueBuffers[offset].xyz
+=
torque2.xyz
;
#
endif
#
ifdef
USE_CUTOFF
}
#
endif
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
}
/**
*
Convert
the
torques
to
forces
on
the
connected
particles.
*/
__kernel
void
applyTorques
(
#
ifdef
SUPPORTS_64_BIT_ATOMICS
__global
long*
restrict
forceBuffers,
__global
long*
restrict
torqueBuffers,
#
else
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
torqueBuffers,
int
numBuffers,
#
endif
int
numParticles,
__global
const
real4*
restrict
posq,
__global
int2*
const
restrict
axisParticleIndices,
__global
const
int*
sortedParticles
)
{
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
for
(
int
sortedIndex
=
get_global_id
(
0
)
; sortedIndex < numParticles; sortedIndex += get_global_size(0)) {
int
originalIndex
=
sortedParticles[sortedIndex]
;
real3
pos
=
posq[originalIndex].xyz
;
int2
axisParticles
=
axisParticleIndices[originalIndex]
;
if
(
axisParticles.x
!=
-1
)
{
//
Load
the
torque.
#
ifdef
SUPPORTS_64_BIT_ATOMICS
real
scale
=
1/
(
real
)
0x100000000
;
real3
torque
=
(
real3
)
(
scale*torqueBuffers[originalIndex],
scale*torqueBuffers[originalIndex+PADDED_NUM_ATOMS],
scale*torqueBuffers[originalIndex+2*PADDED_NUM_ATOMS]
)
;
#
else
real3
torque
=
0
;
for
(
int
i
=
0
; i < numBuffers; i++)
torque
+=
torqueBuffers[originalIndex+i*PADDED_NUM_ATOMS].xyz
;
#
endif
real3
force
=
0
,
xforce
=
0
,
yforce
=
0
;
//
Apply
a
force
to
the
x
particle.
real3
dx
=
posq[axisParticles.x].xyz-pos
;
real
dx2
=
dot
(
dx,
dx
)
;
real3
f
=
cross
(
torque,
dx
)
/dx2
;
xforce
+=
f
;
force
-=
f
;
if
(
axisParticles.y
!=
-1
)
{
//
Apply
a
force
to
the
y
particle.
This
is
based
on
the
component
of
the
torque
//
that
was
not
already
applied
to
the
x
particle.
real3
dy
=
posq[axisParticles.y].xyz-pos
;
real
dy2
=
dot
(
dy,
dy
)
;
real3
torque2
=
dx*dot
(
torque,
dx
)
/dx2
;
f
=
cross
(
torque2,
dy
)
/dy2
;
yforce
+=
f
;
force
-=
f
;
}
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[originalIndex],
(
long
)
(
force.x*0x100000000
))
;
atom_add
(
&forceBuffers[originalIndex+PADDED_NUM_ATOMS],
(
long
)
(
force.y*0x100000000
))
;
atom_add
(
&forceBuffers[originalIndex+2*PADDED_NUM_ATOMS],
(
long
)
(
force.z*0x100000000
))
;
atom_add
(
&forceBuffers[axisParticles.x],
(
long
)
(
xforce.x*0x100000000
))
;
atom_add
(
&forceBuffers[axisParticles.x+PADDED_NUM_ATOMS],
(
long
)
(
xforce.y*0x100000000
))
;
atom_add
(
&forceBuffers[axisParticles.x+2*PADDED_NUM_ATOMS],
(
long
)
(
xforce.z*0x100000000
))
;
if
(
axisParticles.y
!=
-1
)
{
atom_add
(
&forceBuffers[axisParticles.y],
(
long
)
(
yforce.x*0x100000000
))
;
atom_add
(
&forceBuffers[axisParticles.y+PADDED_NUM_ATOMS],
(
long
)
(
yforce.y*0x100000000
))
;
atom_add
(
&forceBuffers[axisParticles.y+2*PADDED_NUM_ATOMS],
(
long
)
(
yforce.z*0x100000000
))
;
}
#
else
forceBuffers[originalIndex
+
warp*PADDED_NUM_ATOMS].xyz
+=
force.xyz
;
forceBuffers[axisParticles.x
+
warp*PADDED_NUM_ATOMS].xyz
+=
xforce.xyz
;
if
(
axisParticles.y
!=
-1
)
forceBuffers[axisParticles.y
+
warp*PADDED_NUM_ATOMS].xyz
+=
yforce.xyz
;
#
endif
}
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment