Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
522e133e
Commit
522e133e
authored
May 20, 2016
by
peastman
Browse files
Continuing OpenCL implementation of GayBerneForce
parent
69cab567
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
221 additions
and
78 deletions
+221
-78
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+9
-5
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+86
-63
platforms/opencl/src/kernels/gayBerne.cl
platforms/opencl/src/kernels/gayBerne.cl
+126
-10
No files found.
platforms/opencl/include/OpenCLKernels.h
View file @
522e133e
...
@@ -1080,9 +1080,10 @@ private:
...
@@ -1080,9 +1080,10 @@ private:
class
OpenCLCalcGayBerneForceKernel
:
public
CalcGayBerneForceKernel
{
class
OpenCLCalcGayBerneForceKernel
:
public
CalcGayBerneForceKernel
{
public:
public:
OpenCLCalcGayBerneForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
CalcGayBerneForceKernel
(
name
,
platform
),
cl
(
cl
),
OpenCLCalcGayBerneForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
CalcGayBerneForceKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
),
sortedParticles
(
NULL
),
axisParticleIndices
(
NULL
),
sigParams
(
NULL
),
epsParams
(
NULL
),
scale
(
NULL
),
aMatrix
(
NULL
),
hasInitializedKernels
(
false
),
sortedParticles
(
NULL
),
axisParticleIndices
(
NULL
),
sigParams
(
NULL
),
epsParams
(
NULL
),
scale
(
NULL
),
exceptionParticles
(
NULL
),
bMatrix
(
NULL
),
gMatrix
(
NULL
),
exclusions
(
NULL
),
exclusionStartIndex
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedPos
(
NULL
),
exceptionParams
(
NULL
),
aMatrix
(
NULL
),
torque
(
NULL
)
{
bMatrix
(
NULL
),
gMatrix
(
NULL
),
exclusions
(
NULL
),
exclusionStartIndex
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
neighbors
(
NULL
),
neighborIndex
(
NULL
),
neighborBlockCount
(
NULL
),
sortedPos
(
NULL
),
torque
(
NULL
)
{
}
}
~
OpenCLCalcGayBerneForceKernel
();
~
OpenCLCalcGayBerneForceKernel
();
/**
/**
...
@@ -1112,7 +1113,7 @@ private:
...
@@ -1112,7 +1113,7 @@ private:
void
sortAtoms
();
void
sortAtoms
();
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
bool
hasInitializedKernels
;
bool
hasInitializedKernels
;
int
numRealParticles
;
int
numRealParticles
,
maxNeighborBlocks
;
GayBerneForce
::
NonbondedMethod
nonbondedMethod
;
GayBerneForce
::
NonbondedMethod
nonbondedMethod
;
OpenCLArray
*
sortedParticles
;
OpenCLArray
*
sortedParticles
;
OpenCLArray
*
axisParticleIndices
;
OpenCLArray
*
axisParticleIndices
;
...
@@ -1128,12 +1129,15 @@ private:
...
@@ -1128,12 +1129,15 @@ private:
OpenCLArray
*
exclusionStartIndex
;
OpenCLArray
*
exclusionStartIndex
;
OpenCLArray
*
blockCenter
;
OpenCLArray
*
blockCenter
;
OpenCLArray
*
blockBoundingBox
;
OpenCLArray
*
blockBoundingBox
;
OpenCLArray
*
neighbors
;
OpenCLArray
*
neighborIndex
;
OpenCLArray
*
neighborBlockCount
;
OpenCLArray
*
sortedPos
;
OpenCLArray
*
sortedPos
;
OpenCLArray
*
torque
;
OpenCLArray
*
torque
;
std
::
vector
<
bool
>
isRealParticle
;
std
::
vector
<
bool
>
isRealParticle
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
excludedPairs
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
excludedPairs
;
cl
::
Kernel
framesKernel
,
blockBoundsKernel
,
neighborsKernel
,
forceKernel
;
cl
::
Kernel
framesKernel
,
blockBoundsKernel
,
neighborsKernel
,
forceKernel
,
torqueKernel
;
};
};
/**
/**
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
522e133e
...
@@ -6038,6 +6038,12 @@ OpenCLCalcGayBerneForceKernel::~OpenCLCalcGayBerneForceKernel() {
...
@@ -6038,6 +6038,12 @@ OpenCLCalcGayBerneForceKernel::~OpenCLCalcGayBerneForceKernel() {
delete blockCenter;
delete blockCenter;
if (blockBoundingBox != NULL)
if (blockBoundingBox != NULL)
delete blockBoundingBox;
delete blockBoundingBox;
if (neighbors != NULL)
delete neighbors;
if (neighborIndex != NULL)
delete neighborIndex;
if (neighborBlockCount != NULL)
delete neighborBlockCount;
if (sortedPos != NULL)
if (sortedPos != NULL)
delete sortedPos;
delete sortedPos;
if (torque != NULL)
if (torque != NULL)
...
@@ -6062,8 +6068,6 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
...
@@ -6062,8 +6068,6 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
vector<mm_float4> scaleVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
vector<mm_float4> scaleVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
vector<mm_int2> axisParticleVector(cl.getPaddedNumAtoms(), mm_int2(0, 0));
vector<mm_int2> axisParticleVector(cl.getPaddedNumAtoms(), mm_int2(0, 0));
isRealParticle.resize(cl.getPaddedNumAtoms());
isRealParticle.resize(cl.getPaddedNumAtoms());
vector
<
vector
<
int
>
>
exclusionList
(
numParticles
);
numRealParticles
=
0
;
for (int i = 0; i < numParticles; i++) {
for (int i = 0; i < numParticles; i++) {
int xparticle, yparticle;
int xparticle, yparticle;
double sigma, epsilon, sx, sy, sz, ex, ey, ez;
double sigma, epsilon, sx, sy, sz, ex, ey, ez;
...
@@ -6072,19 +6076,41 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
...
@@ -6072,19 +6076,41 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
sigParamsVector[i] = mm_float4((float) (0.5*sigma), (float) (0.25*sx*sx), (float) (0.25*sy*sy), (float) (0.25*sz*sz));
sigParamsVector[i] = mm_float4((float) (0.5*sigma), (float) (0.25*sx*sx), (float) (0.25*sy*sy), (float) (0.25*sz*sz));
epsParamsVector[i] = mm_float2((float) sqrt(epsilon), (float) (0.125*(sx*sy + sz*sz)*sqrt(sx*sy)));
epsParamsVector[i] = mm_float2((float) sqrt(epsilon), (float) (0.125*(sx*sy + sz*sz)*sqrt(sx*sy)));
scaleVector[i] = mm_float4((float) (1/sqrt(ex)), (float) (1/sqrt(ey)), (float) (1/sqrt(ez)), 0);
scaleVector[i] = mm_float4((float) (1/sqrt(ex)), (float) (1/sqrt(ey)), (float) (1/sqrt(ez)), 0);
if
(
epsilon
!=
0.0
)
{
isRealParticle[i] = (epsilon != 0.0);
numRealParticles
++
;
isRealParticle
[
i
]
=
true
;
}
else
isRealParticle
[
i
]
=
false
;
exclusionList
[
i
].
push_back
(
i
);
}
}
sigParams->upload(sigParamsVector);
sigParams->upload(sigParamsVector);
epsParams->upload(epsParamsVector);
epsParams->upload(epsParamsVector);
scale->upload(scaleVector);
scale->upload(scaleVector);
axisParticleIndices->upload(axisParticleVector);
axisParticleIndices->upload(axisParticleVector);
// Record exceptions and exclusions.
vector<mm_float2> exceptionParamsVec;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, sigma, epsilon);
if (epsilon != 0.0) {
exceptionParamsVec.push_back(mm_float2((float) sigma, (float) epsilon));
exceptionAtoms.push_back(make_pair(particle1, particle2));
isRealParticle[particle1] = true;
isRealParticle[particle2] = true;
}
if (isRealParticle[particle1] && isRealParticle[particle2])
excludedPairs.push_back(pair<int, int>(particle1, particle2));
}
numRealParticles = 0;
for (int i = 0; i < isRealParticle.size(); i++)
if (isRealParticle[i])
numRealParticles++;
int numExceptions = exceptionParamsVec.size();
exclusions = OpenCLArray::create<cl_int>(cl, max(1, (int) excludedPairs.size()), "exclusions");
exclusionStartIndex = OpenCLArray::create<cl_int>(cl, numRealParticles+1, "exclusionStartIndex");
exceptionParticles = OpenCLArray::create<mm_int4>(cl, max(1, numExceptions), "exceptionParticles");
exceptionParams = OpenCLArray::create<mm_float2>(cl, max(1, numExceptions), "exceptionParams");
if (numExceptions > 0)
exceptionParams->upload(exceptionParamsVec);
// Create data structures used for the neighbor list.
// Create data structures used for the neighbor list.
int numAtomBlocks = (numRealParticles+31)/32;
int numAtomBlocks = (numRealParticles+31)/32;
...
@@ -6092,6 +6118,10 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
...
@@ -6092,6 +6118,10 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
blockCenter = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockCenter");
blockCenter = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockBoundingBox");
blockBoundingBox = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockBoundingBox");
sortedPos = new OpenCLArray(cl, numRealParticles, 4*elementSize, "sortedPos");
sortedPos = new OpenCLArray(cl, numRealParticles, 4*elementSize, "sortedPos");
maxNeighborBlocks = numRealParticles*2;
neighbors = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks*32, "neighbors");
neighborIndex = OpenCLArray::create<mm_int2>(cl, maxNeighborBlocks, "neighbors");
neighborBlockCount = OpenCLArray::create<cl_int>(cl, 1, "neighborBlockCount");
// Create arrays for accumulating torques.
// Create arrays for accumulating torques.
...
@@ -6126,48 +6156,7 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
...
@@ -6126,48 +6156,7 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
blockBoundsKernel = cl::Kernel(program, "findBlockBounds");
blockBoundsKernel = cl::Kernel(program, "findBlockBounds");
neighborsKernel = cl::Kernel(program, "findNeighbors");
neighborsKernel = cl::Kernel(program, "findNeighbors");
forceKernel = cl::Kernel(program, "computeForce");
forceKernel = cl::Kernel(program, "computeForce");
torqueKernel = cl::Kernel(program, "applyTorques");
// Add the interaction to the default nonbonded kernel.
string
source
=
cl
.
replaceStrings
(
OpenCLKernelSources
::
coulombLennardJones
,
defines
);
// cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
cl
.
getNonbondedUtilities
().
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
"sigParams"
,
"float"
,
4
,
sizeof
(
cl_float4
),
sigParams
->
getDeviceBuffer
()));
cl
.
getNonbondedUtilities
().
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
"epsParams"
,
"float"
,
2
,
sizeof
(
cl_float2
),
epsParams
->
getDeviceBuffer
()));
// Record exceptions and exclusions.
vector
<
int
>
exceptions
;
for
(
int
i
=
0
;
i
<
force
.
getNumExceptions
();
i
++
)
{
int
particle1
,
particle2
;
double
sigma
,
epsilon
;
force
.
getExceptionParameters
(
i
,
particle1
,
particle2
,
sigma
,
epsilon
);
if
(
isRealParticle
[
particle1
]
&&
isRealParticle
[
particle2
])
excludedPairs
.
push_back
(
pair
<
int
,
int
>
(
particle1
,
particle2
));
if
(
epsilon
!=
0.0
)
exceptions
.
push_back
(
i
);
}
exclusions
=
OpenCLArray
::
create
<
cl_int
>
(
cl
,
max
(
1
,
(
int
)
excludedPairs
.
size
()),
"exclusions"
);
exclusionStartIndex
=
OpenCLArray
::
create
<
cl_int
>
(
cl
,
numRealParticles
+
1
,
"exclusionStartIndex"
);
// Initialize the exceptions.
int
numExceptions
=
exceptions
.
size
();
if
(
numExceptions
>
0
)
{
exceptionAtoms
.
resize
(
numExceptions
);
vector
<
vector
<
int
>
>
atoms
(
numExceptions
,
vector
<
int
>
(
2
));
exceptionParams
=
OpenCLArray
::
create
<
mm_float2
>
(
cl
,
numExceptions
,
"exceptionParams"
);
vector
<
mm_float2
>
exceptionParamsVector
(
numExceptions
);
for
(
int
i
=
0
;
i
<
numExceptions
;
i
++
)
{
double
sigma
,
epsilon
;
force
.
getExceptionParameters
(
exceptions
[
i
],
atoms
[
i
][
0
],
atoms
[
i
][
1
],
sigma
,
epsilon
);
exceptionParamsVector
[
i
]
=
mm_float2
((
float
)
sigma
,
(
float
)
(
4.0
*
epsilon
));
exceptionAtoms
[
i
]
=
make_pair
(
atoms
[
i
][
0
],
atoms
[
i
][
1
]);
}
exceptionParams
->
upload
(
exceptionParamsVector
);
map
<
string
,
string
>
replacements
;
replacements
[
"PARAMS"
]
=
cl
.
getBondedUtilities
().
addArgument
(
exceptionParams
->
getDeviceBuffer
(),
"float2"
);
// cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
cl.addForce(new OpenCLGayBerneForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force));
cl.addForce(new OpenCLGayBerneForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force));
cl.addReorderListener(new ReorderListener(*this));
cl.addReorderListener(new ReorderListener(*this));
}
}
...
@@ -6191,11 +6180,21 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
...
@@ -6191,11 +6180,21 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
blockBoundsKernel.setArg<cl::Buffer>(8, sortedPos->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(8, sortedPos->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(9, blockCenter->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(9, blockCenter->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(10, blockBoundingBox->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(10, blockBoundingBox->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(11, neighborBlockCount->getDeviceBuffer());
neighborsKernel.setArg<cl_int>(0, numRealParticles);
neighborsKernel.setArg<cl_int>(1, maxNeighborBlocks);
neighborsKernel.setArg<cl::Buffer>(7, sortedPos->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(8, blockCenter->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(9, blockBoundingBox->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(10, neighbors->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(11, neighborIndex->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(12, neighborBlockCount->getDeviceBuffer());
bool useLong = cl.getSupports64BitGlobalAtomics();
bool useLong = cl.getSupports64BitGlobalAtomics();
int index = 0;
int index = 0;
forceKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
forceKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
forceKernel.setArg<cl::Buffer>(index++, torque->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, torque->getDeviceBuffer());
forceKernel.setArg<cl_int>(index++, numRealParticles);
forceKernel.setArg<cl_int>(index++, numRealParticles);
forceKernel.setArg<cl_int>(index++, exceptionAtoms.size());
forceKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sortedPos->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sortedPos->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sigParams->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sigParams->getDeviceBuffer());
...
@@ -6206,13 +6205,27 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
...
@@ -6206,13 +6205,27 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
forceKernel.setArg<cl::Buffer>(index++, gMatrix->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, gMatrix->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusions->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusions->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex->getDeviceBuffer());
}
forceKernel.setArg<cl::Buffer>(index++, exceptionParticles->getDeviceBuffer());
cl
.
executeKernel
(
framesKernel
,
cl
.
getNumAtoms
());
forceKernel.setArg<cl::Buffer>(index++, exceptionParams->getDeviceBuffer());
index = 0;
torqueKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
torqueKernel.setArg<cl::Buffer>(index++, torque->getDeviceBuffer());
if (!useLong)
torqueKernel.setArg<cl_int>(index++, cl.getNumForceBuffers());
torqueKernel.setArg<cl_int>(index++, numRealParticles);
torqueKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
torqueKernel.setArg<cl::Buffer>(index++, axisParticleIndices->getDeviceBuffer());
torqueKernel.setArg<cl::Buffer>(index++, sortedParticles->getDeviceBuffer());
}
cl.executeKernel(framesKernel, numRealParticles);
setPeriodicBoxArgs(cl, blockBoundsKernel, 1);
setPeriodicBoxArgs(cl, blockBoundsKernel, 1);
cl.executeKernel(blockBoundsKernel, (numRealParticles+31)/32);
cl.executeKernel(blockBoundsKernel, (numRealParticles+31)/32);
if (nonbondedMethod != GayBerneForce::NoCutoff) {
if (nonbondedMethod != GayBerneForce::NoCutoff) {
setPeriodicBoxArgs(cl, neighborsKernel, 2);
cl.executeKernel(neighborsKernel, numRealParticles);
}
}
cl.executeKernel(forceKernel, cl.getNonbondedUtilities().getNumForceThreadBlocks()*cl.getNonbondedUtilities().getForceThreadBlockSize());
cl.executeKernel(forceKernel, cl.getNonbondedUtilities().getNumForceThreadBlocks()*cl.getNonbondedUtilities().getForceThreadBlockSize());
cl.executeKernel(torqueKernel, numRealParticles);
return 0.0;
return 0.0;
}
}
...
@@ -6231,10 +6244,7 @@ void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context
...
@@ -6231,10 +6244,7 @@ void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context
else if (epsilon != 0.0)
else if (epsilon != 0.0)
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
}
}
int
numContexts
=
cl
.
getPlatformData
().
contexts
.
size
();
int numExceptions = exceptionAtoms.size();
int
startIndex
=
cl
.
getContextIndex
()
*
exceptions
.
size
()
/
numContexts
;
int
endIndex
=
(
cl
.
getContextIndex
()
+
1
)
*
exceptions
.
size
()
/
numContexts
;
int
numExceptions
=
endIndex
-
startIndex
;
// Record the per-particle parameters.
// Record the per-particle parameters.
...
@@ -6263,14 +6273,14 @@ void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context
...
@@ -6263,14 +6273,14 @@ void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context
// Record the exceptions.
// Record the exceptions.
if (numExceptions > 0) {
if (numExceptions > 0) {
vector
<
vector
<
int
>
>
atoms
(
numExceptions
,
vector
<
int
>
(
2
));
vector<mm_float2> exceptionParamsVec(numExceptions);
vector
<
mm_float2
>
exceptionParamsVector
(
numExceptions
);
for (int i = 0; i < numExceptions; i++) {
for (int i = 0; i < numExceptions; i++) {
int atom1, atom2;
double sigma, epsilon;
double sigma, epsilon;
force
.
getExceptionParameters
(
exceptions
[
startIndex
+
i
],
atoms
[
i
][
0
],
atoms
[
i
][
1
]
,
sigma
,
epsilon
);
force.getExceptionParameters(exceptions[
i], atom1, atom2
, sigma, epsilon);
exceptionParamsVec
tor
[
i
]
=
mm_float2
((
float
)
sigma
,
(
float
)
(
4.0
*
epsilon
)
)
;
exceptionParamsVec[i] = mm_float2((float) sigma, (float) epsilon);
}
}
exceptionParams
->
upload
(
exceptionParamsVec
tor
);
exceptionParams->upload(exceptionParamsVec);
}
}
cl.invalidateMolecules();
cl.invalidateMolecules();
sortAtoms();
sortAtoms();
...
@@ -6283,13 +6293,26 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
...
@@ -6283,13 +6293,26 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
int nextIndex = 0;
int nextIndex = 0;
vector<cl_int> particles(cl.getPaddedNumAtoms(), 0);
vector<cl_int> particles(cl.getPaddedNumAtoms(), 0);
const vector<int>& order = cl.getAtomIndex();
const vector<int>& order = cl.getAtomIndex();
vector<int> inverseOrder(order.size(), -1);
for (int i = 0; i < cl.getNumAtoms(); i++) {
for (int i = 0; i < cl.getNumAtoms(); i++) {
int atom = order[i];
int atom = order[i];
if
(
isRealParticle
[
atom
])
if (isRealParticle[atom]) {
inverseOrder[atom] = nextIndex;
particles[nextIndex++] = atom;
particles[nextIndex++] = atom;
}
}
}
sortedParticles->upload(particles);
sortedParticles->upload(particles);
// Update the list of exception particles.
int numExceptions = exceptionAtoms.size();
if (numExceptions > 0) {
vector<mm_int4> exceptionParticlesVec(numExceptions);
for (int i = 0; i < numExceptions; i++)
exceptionParticlesVec[i] = mm_int4(exceptionAtoms[i].first, exceptionAtoms[i].second, inverseOrder[exceptionAtoms[i].first], inverseOrder[exceptionAtoms[i].second]);
exceptionParticles->upload(exceptionParticlesVec);
}
// Rebuild the list of exclusions.
// Rebuild the list of exclusions.
vector<vector<int> > excludedAtoms(numRealParticles);
vector<vector<int> > excludedAtoms(numRealParticles);
...
...
platforms/opencl/src/kernels/gayBerne.cl
View file @
522e133e
...
@@ -71,7 +71,7 @@ __kernel void computeEllipsoidFrames(int numParticles, __global const real4* res
...
@@ -71,7 +71,7 @@ __kernel void computeEllipsoidFrames(int numParticles, __global const real4* res
*/
*/
__kernel
void
findBlockBounds
(
int
numAtoms,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__kernel
void
findBlockBounds
(
int
numAtoms,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__global
const
int*
sortedAtoms,
__global
const
real4*
restrict
posq,
__global
real4*
restrict
sortedPos,
__global
real4*
restrict
blockCenter,
__global
const
int*
sortedAtoms,
__global
const
real4*
restrict
posq,
__global
real4*
restrict
sortedPos,
__global
real4*
restrict
blockCenter,
__global
real4*
restrict
blockBoundingBox
)
{
__global
real4*
restrict
blockBoundingBox
,
__global
int*
restrict
neighborBlockCount
)
{
int
index
=
get_global_id
(
0
)
;
int
index
=
get_global_id
(
0
)
;
int
base
=
index*TILE_SIZE
;
int
base
=
index*TILE_SIZE
;
while
(
base
<
numAtoms
)
{
while
(
base
<
numAtoms
)
{
...
@@ -99,14 +99,16 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
...
@@ -99,14 +99,16 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
index
+=
get_global_size
(
0
)
;
index
+=
get_global_size
(
0
)
;
base
=
index*TILE_SIZE
;
base
=
index*TILE_SIZE
;
}
}
if
(
get_global_id
(
0
)
==
0
)
*neighborBlockCount
=
0
;
}
}
/**
/**
*
Build
a
list
of
neighbors
for
each
atom.
*
Build
a
list
of
neighbors
for
each
atom.
*/
*/
__kernel
void
findNeighbors
(
int
numAtoms,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__kernel
void
findNeighbors
(
int
numAtoms,
int
maxNeighborBlocks,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__global
const
int*
sorted
Atom
s,
__global
const
real4*
restrict
posq
,
__global
real4*
restrict
sortedPos
,
__global
real4
*
restrict
blockCenter
,
__global
real4*
restrict
sorted
Po
s,
__global
real4*
restrict
blockCenter
,
__global
real4*
restrict
blockBoundingBox
,
__global
int
*
restrict
neighbors
,
__global
real4
*
restrict
b
lock
B
oun
dingBox
)
{
__global
int2*
restrict
neighborIndex,
__global
int
*
restrict
neighborB
lock
C
oun
t
)
{
}
}
typedef
struct
{
typedef
struct
{
...
@@ -281,10 +283,11 @@ __kernel void computeForce(
...
@@ -281,10 +283,11 @@ __kernel void computeForce(
#
else
#
else
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
torqueBuffers,
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
torqueBuffers,
#
endif
#
endif
int
numAtoms,
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
pos,
__global
const
float4*
restrict
sigParams,
int
numAtoms,
int
numExceptions,
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
pos,
__global
const
float2*
restrict
epsParams,
__global
const
int*
restrict
sortedAtoms,
__global
const
real*
restrict
aMatrix,
__global
const
float4*
restrict
sigParams,
__global
const
float2*
restrict
epsParams,
__global
const
int*
restrict
sortedAtoms,
__global
const
real*
restrict
bMatrix,
__global
const
real*
restrict
gMatrix,
__global
const
int*
restrict
exclusions,
__global
const
real*
restrict
aMatrix,
__global
const
real*
restrict
bMatrix,
__global
const
real*
restrict
gMatrix,
__global
const
int*
restrict
exclusionStartIndex
__global
const
int*
restrict
exclusions,
__global
const
int*
restrict
exclusionStartIndex,
__global
const
int4*
restrict
exceptionParticles,
__global
const
float2*
restrict
exceptionParams
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
...
@@ -306,14 +309,14 @@ __kernel void computeForce(
...
@@ -306,14 +309,14 @@ __kernel void computeForce(
for
(
int
atom2
=
atom1+1
; atom2 < numAtoms; atom2++) {
for
(
int
atom2
=
atom1+1
; atom2 < numAtoms; atom2++) {
//
Skip
over
excluded
interactions.
//
Skip
over
excluded
interactions.
if
(
nextExclusion
<
lastExclusion
&&
exclusions[nextExclusion]
==
atom2
)
{
int
index2
=
sortedAtoms[atom2]
;
if
(
nextExclusion
<
lastExclusion
&&
exclusions[nextExclusion]
==
index2
)
{
nextExclusion++
;
nextExclusion++
;
continue
;
continue
;
}
}
//
Load
parameters
for
atom2.
//
Load
parameters
for
atom2.
int
index2
=
sortedAtoms[atom2]
;
AtomData
data2
;
AtomData
data2
;
loadAtomData
(
&data2,
atom2,
index2,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
loadAtomData
(
&data2,
atom2,
index2,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
real3
force2
=
0.0f
;
real3
force2
=
0.0f
;
...
@@ -361,5 +364,118 @@ __kernel void computeForce(
...
@@ -361,5 +364,118 @@ __kernel void computeForce(
torqueBuffers[offset].xyz
+=
torque1.xyz
;
torqueBuffers[offset].xyz
+=
torque1.xyz
;
#
endif
#
endif
}
}
//
Now
compute
exceptions.
for
(
int
index
=
get_global_id
(
0
)
; index < numExceptions; index += get_global_size(0)) {
int4
atomIndices
=
exceptionParticles[index]
;
real2
params
=
exceptionParams[index]
;
int
index1
=
atomIndices.x,
index2
=
atomIndices.y
;
int
atom1
=
atomIndices.z,
atom2
=
atomIndices.w
;
AtomData
data1,
data2
;
loadAtomData
(
&data1,
atom1,
index1,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
loadAtomData
(
&data2,
atom2,
index2,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
real3
force1
=
0
,
force2
=
0
;
real3
torque1
=
0
,
torque2
=
0
;
real3
delta
=
data1.pos-data2.pos
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
computeOneInteraction
(
&data1,
&data2,
params.x,
params.y,
delta,
r2,
&force1,
&force2,
&torque1,
&torque2,
&energy
)
;
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[index1],
(
long
)
(
force1.x*0x100000000
))
;
atom_add
(
&forceBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
force1.y*0x100000000
))
;
atom_add
(
&forceBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
force1.z*0x100000000
))
;
atom_add
(
&forceBuffers[index2],
(
long
)
(
force2.x*0x100000000
))
;
atom_add
(
&forceBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
force2.y*0x100000000
))
;
atom_add
(
&forceBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
force2.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index1],
(
long
)
(
torque1.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
torque1.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
torque1.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index2],
(
long
)
(
torque2.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
torque2.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
torque2.z*0x100000000
))
;
#
else
int
offset
=
index1
+
warp*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
+=
force1.xyz
;
torqueBuffers[offset].xyz
+=
torque1.xyz
;
offset
=
index2
+
warp*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
+=
force2.xyz
;
torqueBuffers[offset].xyz
+=
torque2.xyz
;
#
endif
#
ifdef
USE_CUTOFF
}
#
endif
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
}
}
/**
*
Convert
the
torques
to
forces
on
the
connected
particles.
*/
__kernel
void
applyTorques
(
#
ifdef
SUPPORTS_64_BIT_ATOMICS
__global
long*
restrict
forceBuffers,
__global
long*
restrict
torqueBuffers,
#
else
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
torqueBuffers,
int
numBuffers,
#
endif
int
numParticles,
__global
const
real4*
restrict
posq,
__global
int2*
const
restrict
axisParticleIndices,
__global
const
int*
sortedParticles
)
{
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
for
(
int
sortedIndex
=
get_global_id
(
0
)
; sortedIndex < numParticles; sortedIndex += get_global_size(0)) {
int
originalIndex
=
sortedParticles[sortedIndex]
;
real3
pos
=
posq[originalIndex].xyz
;
int2
axisParticles
=
axisParticleIndices[originalIndex]
;
if
(
axisParticles.x
!=
-1
)
{
//
Load
the
torque.
#
ifdef
SUPPORTS_64_BIT_ATOMICS
real
scale
=
1/
(
real
)
0x100000000
;
real3
torque
=
(
real3
)
(
scale*torqueBuffers[originalIndex],
scale*torqueBuffers[originalIndex+PADDED_NUM_ATOMS],
scale*torqueBuffers[originalIndex+2*PADDED_NUM_ATOMS]
)
;
#
else
real3
torque
=
0
;
for
(
int
i
=
0
; i < numBuffers; i++)
torque
+=
torqueBuffers[originalIndex+i*PADDED_NUM_ATOMS].xyz
;
#
endif
real3
force
=
0
,
xforce
=
0
,
yforce
=
0
;
//
Apply
a
force
to
the
x
particle.
real3
dx
=
posq[axisParticles.x].xyz-pos
;
real
dx2
=
dot
(
dx,
dx
)
;
real3
f
=
cross
(
torque,
dx
)
/dx2
;
xforce
+=
f
;
force
-=
f
;
if
(
axisParticles.y
!=
-1
)
{
//
Apply
a
force
to
the
y
particle.
This
is
based
on
the
component
of
the
torque
//
that
was
not
already
applied
to
the
x
particle.
real3
dy
=
posq[axisParticles.y].xyz-pos
;
real
dy2
=
dot
(
dy,
dy
)
;
real3
torque2
=
dx*dot
(
torque,
dx
)
/dx2
;
f
=
cross
(
torque2,
dy
)
/dy2
;
yforce
+=
f
;
force
-=
f
;
}
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[originalIndex],
(
long
)
(
force.x*0x100000000
))
;
atom_add
(
&forceBuffers[originalIndex+PADDED_NUM_ATOMS],
(
long
)
(
force.y*0x100000000
))
;
atom_add
(
&forceBuffers[originalIndex+2*PADDED_NUM_ATOMS],
(
long
)
(
force.z*0x100000000
))
;
atom_add
(
&forceBuffers[axisParticles.x],
(
long
)
(
xforce.x*0x100000000
))
;
atom_add
(
&forceBuffers[axisParticles.x+PADDED_NUM_ATOMS],
(
long
)
(
xforce.y*0x100000000
))
;
atom_add
(
&forceBuffers[axisParticles.x+2*PADDED_NUM_ATOMS],
(
long
)
(
xforce.z*0x100000000
))
;
if
(
axisParticles.y
!=
-1
)
{
atom_add
(
&forceBuffers[axisParticles.y],
(
long
)
(
yforce.x*0x100000000
))
;
atom_add
(
&forceBuffers[axisParticles.y+PADDED_NUM_ATOMS],
(
long
)
(
yforce.y*0x100000000
))
;
atom_add
(
&forceBuffers[axisParticles.y+2*PADDED_NUM_ATOMS],
(
long
)
(
yforce.z*0x100000000
))
;
}
#
else
forceBuffers[originalIndex
+
warp*PADDED_NUM_ATOMS].xyz
+=
force.xyz
;
forceBuffers[axisParticles.x
+
warp*PADDED_NUM_ATOMS].xyz
+=
xforce.xyz
;
if
(
axisParticles.y
!=
-1
)
forceBuffers[axisParticles.y
+
warp*PADDED_NUM_ATOMS].xyz
+=
yforce.xyz
;
#
endif
}
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment