Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
ae686364
Unverified
Commit
ae686364
authored
Aug 17, 2022
by
Peter Eastman
Committed by
GitHub
Aug 17, 2022
Browse files
Improved support for devices without 64 bit atomics (#3737)
parent
48664a1f
Changes
29
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
117 additions
and
1037 deletions
+117
-1037
platforms/common/include/openmm/common/CommonKernels.h
platforms/common/include/openmm/common/CommonKernels.h
+0
-2
platforms/common/src/CommonKernels.cpp
platforms/common/src/CommonKernels.cpp
+49
-198
platforms/common/src/IntegrationUtilities.cpp
platforms/common/src/IntegrationUtilities.cpp
+0
-2
platforms/common/src/kernels/customGBEnergyN2.cc
platforms/common/src/kernels/customGBEnergyN2.cc
+0
-35
platforms/common/src/kernels/customGBEnergyN2_cpu.cc
platforms/common/src/kernels/customGBEnergyN2_cpu.cc
+0
-45
platforms/common/src/kernels/customGBEnergyPerParticle.cc
platforms/common/src/kernels/customGBEnergyPerParticle.cc
+0
-7
platforms/common/src/kernels/customGBGradientChainRule.cc
platforms/common/src/kernels/customGBGradientChainRule.cc
+0
-12
platforms/common/src/kernels/customGBValueN2.cc
platforms/common/src/kernels/customGBValueN2.cc
+0
-26
platforms/common/src/kernels/customGBValueN2_cpu.cc
platforms/common/src/kernels/customGBValueN2_cpu.cc
+0
-34
platforms/common/src/kernels/customGBValuePerParticle.cc
platforms/common/src/kernels/customGBValuePerParticle.cc
+0
-11
platforms/common/src/kernels/customHbondForce.cc
platforms/common/src/kernels/customHbondForce.cc
+0
-52
platforms/common/src/kernels/customNonbondedGroups.cc
platforms/common/src/kernels/customNonbondedGroups.cc
+0
-38
platforms/common/src/kernels/gbsaObc.cc
platforms/common/src/kernels/gbsaObc.cc
+0
-46
platforms/common/src/kernels/gbsaObc2.cc
platforms/common/src/kernels/gbsaObc2.cc
+0
-5
platforms/common/src/kernels/gbsaObcReductions.cc
platforms/common/src/kernels/gbsaObcReductions.cc
+1
-26
platforms/common/src/kernels/gbsaObc_cpu.cc
platforms/common/src/kernels/gbsaObc_cpu.cc
+0
-82
platforms/common/src/kernels/pme.cc
platforms/common/src/kernels/pme.cc
+0
-232
platforms/opencl/include/OpenCLBondedUtilities.h
platforms/opencl/include/OpenCLBondedUtilities.h
+3
-11
platforms/opencl/include/OpenCLNonbondedUtilities.h
platforms/opencl/include/OpenCLNonbondedUtilities.h
+2
-2
platforms/opencl/src/OpenCLBondedUtilities.cpp
platforms/opencl/src/OpenCLBondedUtilities.cpp
+62
-171
No files found.
platforms/common/include/openmm/common/CommonKernels.h
View file @
ae686364
...
...
@@ -786,8 +786,6 @@ private:
ComputeArray
globals
;
ComputeArray
donors
;
ComputeArray
acceptors
;
ComputeArray
donorBufferIndices
;
ComputeArray
acceptorBufferIndices
;
ComputeArray
donorExclusions
;
ComputeArray
acceptorExclusions
;
std
::
vector
<
std
::
string
>
globalParamNames
;
...
...
platforms/common/src/CommonKernels.cpp
View file @
ae686364
...
...
@@ -1483,8 +1483,6 @@ void CommonCalcCustomCentroidBondForceKernel::initialize(const System& system, c
numBonds
=
force
.
getNumBonds
();
if
(
numBonds
==
0
)
return
;
if (!cc.getSupports64BitGlobalAtomics())
throw OpenMMException("CustomCentroidBondForce requires a device that supports 64 bit atomic operations");
info
=
new
ForceInfo
(
force
);
cc
.
addForce
(
info
);
...
...
@@ -2380,8 +2378,7 @@ double CommonCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
if
(
interactionGroupData
.
isInitialized
())
{
if
(
!
hasInitializedKernel
)
{
hasInitializedKernel
=
true
;
bool useLong = cc.getSupports64BitGlobalAtomics();
interactionGroupKernel->addArg((useLong ? cc.getLongForceBuffer() : cc.getForceBuffers()));
interactionGroupKernel
->
addArg
(
cc
.
getLongForceBuffer
());
interactionGroupKernel
->
addArg
(
cc
.
getEnergyBuffer
());
interactionGroupKernel
->
addArg
(
cc
.
getPosq
());
interactionGroupKernel
->
addArg
((
useNeighborList
?
filteredGroupData
:
interactionGroupData
));
...
...
@@ -2501,15 +2498,8 @@ void CommonCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
charges
.
initialize
(
cc
,
cc
.
getPaddedNumAtoms
(),
elementSize
,
"gbsaObcCharges"
);
bornRadii
.
initialize
(
cc
,
cc
.
getPaddedNumAtoms
(),
elementSize
,
"bornRadii"
);
obcChain
.
initialize
(
cc
,
cc
.
getPaddedNumAtoms
(),
elementSize
,
"obcChain"
);
if (cc.getSupports64BitGlobalAtomics()) {
bornSum.initialize<long long>(cc, cc.getPaddedNumAtoms(), "bornSum");
bornForce.initialize<long long>(cc, cc.getPaddedNumAtoms(), "bornForce");
}
else {
int bufferSize = cc.getPaddedNumAtoms()*nb.getNumForceBuffers();
bornSum.initialize(cc, bufferSize, elementSize, "bornSum");
bornForce.initialize(cc, bufferSize, elementSize, "bornForce");
}
bornSum
.
initialize
<
long
long
>
(
cc
,
cc
.
getPaddedNumAtoms
(),
"bornSum"
);
bornForce
.
initialize
<
long
long
>
(
cc
,
cc
.
getPaddedNumAtoms
(),
"bornForce"
);
cc
.
addAutoclearBuffer
(
bornSum
);
cc
.
addAutoclearBuffer
(
bornForce
);
vector
<
double
>
chargeVec
(
cc
.
getPaddedNumAtoms
());
...
...
@@ -2541,7 +2531,7 @@ void CommonCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
nb
.
addInteraction
(
useCutoff
,
usePeriodic
,
false
,
cutoff
,
vector
<
vector
<
int
>
>
(),
source
,
force
.
getForceGroup
());
nb
.
addParameter
(
ComputeParameterInfo
(
charges
,
prefix
+
"charge"
,
"float"
,
1
));
nb
.
addParameter
(
ComputeParameterInfo
(
params
,
prefix
+
"obcParams"
,
"float"
,
2
));
nb.addParameter(ComputeParameterInfo(bornForce, prefix+"bornForce",
cc.getSupports64BitGlobalAtomics() ? "mm_long" : "real
", 1));
nb
.
addParameter
(
ComputeParameterInfo
(
bornForce
,
prefix
+
"bornForce"
,
"mm_long
"
,
1
));
info
=
new
ForceInfo
(
force
);
cc
.
addForce
(
info
);
}
...
...
@@ -2580,7 +2570,6 @@ double CommonCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
else
file
=
CommonKernelSources
::
gbsaObc
;
ComputeProgram
program
=
cc
.
compileProgram
(
file
,
defines
);
bool useLong = cc.getSupports64BitGlobalAtomics();
computeBornSumKernel
=
program
->
createKernel
(
"computeBornSum"
);
computeBornSumKernel
->
addArg
(
bornSum
);
computeBornSumKernel
->
addArg
(
cc
.
getPosq
());
...
...
@@ -2600,7 +2589,7 @@ double CommonCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
computeBornSumKernel
->
addArg
(
numAtomBlocks
*
(
numAtomBlocks
+
1
)
/
2
);
computeBornSumKernel
->
addArg
(
nb
.
getExclusionTiles
());
force1Kernel
=
program
->
createKernel
(
"computeGBSAForce1"
);
force1Kernel->addArg(
useLong ?
cc.getLongForceBuffer()
: cc.getForceBuffers()
);
force1Kernel
->
addArg
(
cc
.
getLongForceBuffer
());
force1Kernel
->
addArg
(
bornForce
);
force1Kernel
->
addArg
(
cc
.
getEnergyBuffer
());
force1Kernel
->
addArg
(
cc
.
getPosq
());
...
...
@@ -2626,19 +2615,11 @@ double CommonCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
reduceBornSumKernel
->
addArg
(
0.8
f
);
reduceBornSumKernel
->
addArg
(
4.85
f
);
reduceBornSumKernel
->
addArg
(
bornSum
);
if (!useLong) {
reduceBornSumKernel->addArg(cc.getPaddedNumAtoms());
reduceBornSumKernel->addArg(cc.getForceBuffers().getSize()/cc.getPaddedNumAtoms());
}
reduceBornSumKernel
->
addArg
(
params
);
reduceBornSumKernel
->
addArg
(
bornRadii
);
reduceBornSumKernel
->
addArg
(
obcChain
);
reduceBornForceKernel
=
program
->
createKernel
(
"reduceBornForce"
);
reduceBornForceKernel
->
addArg
(
bornForce
);
if (!useLong) {
reduceBornForceKernel->addArg(cc.getPaddedNumAtoms());
reduceBornForceKernel->addArg(cc.getForceBuffers().getSize()/cc.getPaddedNumAtoms());
}
reduceBornForceKernel
->
addArg
(
cc
.
getEnergyBuffer
());
reduceBornForceKernel
->
addArg
(
params
);
reduceBornForceKernel
->
addArg
(
bornRadii
);
...
...
@@ -2872,28 +2853,17 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo
energyParamDerivExpressions
[
i
].
push_back
(
ex
.
differentiate
(
force
.
getEnergyParameterDerivativeName
(
j
)).
optimize
());
}
bool
deviceIsCpu
=
cc
.
getIsCPU
();
bool useLong = cc.getSupports64BitGlobalAtomics();
int
elementSize
=
(
cc
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
if (useLong) {
valueBuffers.initialize<long long>(cc, cc.getPaddedNumAtoms(), "customGBValueBuffers");
longEnergyDerivs.initialize<long long>(cc, numComputedValues*cc.getPaddedNumAtoms(), "customGBLongEnergyDerivatives");
energyDerivs = new ComputeParameterSet(cc, numComputedValues, cc.getPaddedNumAtoms(), "customGBEnergyDerivatives", true);
}
else {
int bufferSize = cc.getPaddedNumAtoms()*nb.getNumForceBuffers();
valueBuffers.initialize(cc, bufferSize, elementSize, "customGBValueBuffers");
energyDerivs = new ComputeParameterSet(cc, numComputedValues, bufferSize, "customGBEnergyDerivatives", true);
}
valueBuffers
.
initialize
<
long
long
>
(
cc
,
cc
.
getPaddedNumAtoms
(),
"customGBValueBuffers"
);
longEnergyDerivs
.
initialize
<
long
long
>
(
cc
,
numComputedValues
*
cc
.
getPaddedNumAtoms
(),
"customGBLongEnergyDerivatives"
);
energyDerivs
=
new
ComputeParameterSet
(
cc
,
numComputedValues
,
cc
.
getPaddedNumAtoms
(),
"customGBEnergyDerivatives"
,
true
);
cc
.
addAutoclearBuffer
(
valueBuffers
);
energyDerivChain
=
new
ComputeParameterSet
(
cc
,
numComputedValues
,
cc
.
getPaddedNumAtoms
(),
"customGBEnergyDerivativeChain"
,
true
);
needEnergyParamDerivs
=
(
force
.
getNumEnergyParameterDerivatives
()
>
0
);
dValue0dParam
.
resize
(
force
.
getNumEnergyParameterDerivatives
());
for
(
int
i
=
0
;
i
<
force
.
getNumEnergyParameterDerivatives
();
i
++
)
{
dValuedParam
.
push_back
(
new
ComputeParameterSet
(
cc
,
numComputedValues
,
cc
.
getPaddedNumAtoms
(),
"dValuedParam"
,
true
,
cc
.
getUseDoublePrecision
()));
if (useLong)
dValue0dParam[i].initialize<long long>(cc, cc.getPaddedNumAtoms(), "dValue0dParam");
else
dValue0dParam[i].initialize(cc, cc.getPaddedNumAtoms()*nb.getNumForceBuffers(), elementSize, "dValue0dParam");
dValue0dParam
[
i
].
initialize
<
long
long
>
(
cc
,
cc
.
getPaddedNumAtoms
(),
"dValue0dParam"
);
cc
.
addAutoclearBuffer
(
dValue0dParam
[
i
]);
string
name
=
force
.
getEnergyParameterDerivativeName
(
i
);
cc
.
addEnergyParameterDerivative
(
name
);
...
...
@@ -2960,10 +2930,7 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo
}
for
(
int
i
=
0
;
i
<
force
.
getNumEnergyParameterDerivatives
();
i
++
)
{
string
derivName
=
"dValue0dParam"
+
cc
.
intToString
(
i
+
1
);
if (useLong)
extraArgs << ", GLOBAL mm_ulong* RESTRICT global_" << derivName;
else
extraArgs << ", GLOBAL real* RESTRICT global_" << derivName;
extraArgs
<<
", GLOBAL mm_ulong* RESTRICT global_"
<<
derivName
;
atomParams
<<
"LOCAL real local_"
<<
derivName
<<
"[LOCAL_BUFFER_SIZE];
\n
"
;
loadLocal2
<<
"local_"
<<
derivName
<<
"[localAtomIndex] = 0;
\n
"
;
load1
<<
"real "
<<
derivName
<<
" = 0;
\n
"
;
...
...
@@ -2975,20 +2942,11 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo
tempDerivs2
<<
"local_"
<<
derivName
<<
"[j] += temp_"
<<
derivName
<<
"_2;
\n
"
;
else
tempDerivs2
<<
"local_"
<<
derivName
<<
"[tbx+tj] += temp_"
<<
derivName
<<
"_2;
\n
"
;
if (useLong) {
storeDeriv1 << "ATOMIC_ADD(&global_" << derivName << "[offset1], (mm_ulong) realToFixedPoint(" << derivName << "));\n";
if (deviceIsCpu)
storeDeriv2 << "ATOMIC_ADD(&global_" << derivName << "[offset2], (mm_ulong) realToFixedPoint(local_" << derivName << "[tgx]));\n";
else
storeDeriv2 << "ATOMIC_ADD(&global_" << derivName << "[offset2], (mm_ulong) realToFixedPoint(local_" << derivName << "[LOCAL_ID]));\n";
}
else {
storeDeriv1 << "global_" << derivName << "[offset1] += " << derivName << ";\n";
if (deviceIsCpu)
storeDeriv2 << "global_" << derivName << "[offset2] += local_" << derivName << "[tgx];\n";
else
storeDeriv2 << "global_" << derivName << "[offset2] += local_" << derivName << "[LOCAL_ID];\n";
}
storeDeriv1
<<
"ATOMIC_ADD(&global_"
<<
derivName
<<
"[offset1], (mm_ulong) realToFixedPoint("
<<
derivName
<<
"));
\n
"
;
if
(
deviceIsCpu
)
storeDeriv2
<<
"ATOMIC_ADD(&global_"
<<
derivName
<<
"[offset2], (mm_ulong) realToFixedPoint(local_"
<<
derivName
<<
"[tgx]));
\n
"
;
else
storeDeriv2
<<
"ATOMIC_ADD(&global_"
<<
derivName
<<
"[offset2], (mm_ulong) realToFixedPoint(local_"
<<
derivName
<<
"[LOCAL_ID]));
\n
"
;
}
}
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
...
...
@@ -3039,16 +2997,8 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo
}
for
(
int
i
=
0
;
i
<
force
.
getNumEnergyParameterDerivatives
();
i
++
)
{
string
variableName
=
"dValuedParam_0_"
+
cc
.
intToString
(
i
);
if (useLong) {
extraArgs << ", GLOBAL const mm_long* RESTRICT dValue0dParam" << i;
deriv0 << "real " << variableName << " = RECIP((real) 0x100000000)*dValue0dParam" << i << "[index];\n";
}
else {
extraArgs << ", GLOBAL const real* RESTRICT dValue0dParam" << i;
deriv0 << "real " << variableName << " = dValue0dParam" << i << "[index];\n";
deriv0 << "for (int i = index+bufferSize; i < totalSize; i += bufferSize)\n";
deriv0 << " " << variableName << " += dValue0dParam" << i << "[i];\n";
}
extraArgs
<<
", GLOBAL const mm_long* RESTRICT dValue0dParam"
<<
i
;
deriv0
<<
"real "
<<
variableName
<<
" = RECIP((real) 0x100000000)*dValue0dParam"
<<
i
<<
"[index];
\n
"
;
for
(
int
j
=
0
;
j
<
dValuedParam
[
i
]
->
getParameterInfos
().
size
();
j
++
)
extraArgs
<<
", GLOBAL real* RESTRICT global_dValuedParam_"
<<
j
<<
"_"
<<
i
;
deriv0
<<
"global_dValuedParam_0_"
<<
i
<<
"[index] = dValuedParam_0_"
<<
i
<<
";
\n
"
;
...
...
@@ -3129,21 +3079,11 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo
map
<
string
,
Lepton
::
ParsedExpression
>
n2EnergyExpressions
;
n2EnergyExpressions
[
"tempEnergy += "
]
=
Lepton
::
Parser
::
parse
(
expression
,
functions
).
optimize
();
n2EnergyExpressions
[
"dEdR += "
]
=
Lepton
::
Parser
::
parse
(
expression
,
functions
).
differentiate
(
"r"
).
optimize
();
if (useLong) {
for (int j = 0; j < numComputedValues; j++) {
if (needChainForValue[j]) {
string index = cc.intToString(j+1);
n2EnergyExpressions["/*"+cc.intToString(i+1)+"*/ deriv"+index+"_1 += "] = energyDerivExpressions[i][2*j];
n2EnergyExpressions["/*"+cc.intToString(i+1)+"*/ deriv"+index+"_2 += "] = energyDerivExpressions[i][2*j+1];
}
}
}
else {
for (int j = 0; j < numComputedValues; j++) {
if (needChainForValue[j]) {
n2EnergyExpressions["/*"+cc.intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j, "_1")+" += "] = energyDerivExpressions[i][2*j];
n2EnergyExpressions["/*"+cc.intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j, "_2")+" += "] = energyDerivExpressions[i][2*j+1];
}
for
(
int
j
=
0
;
j
<
numComputedValues
;
j
++
)
{
if
(
needChainForValue
[
j
])
{
string
index
=
cc
.
intToString
(
j
+
1
);
n2EnergyExpressions
[
"/*"
+
cc
.
intToString
(
i
+
1
)
+
"*/ deriv"
+
index
+
"_1 += "
]
=
energyDerivExpressions
[
i
][
2
*
j
];
n2EnergyExpressions
[
"/*"
+
cc
.
intToString
(
i
+
1
)
+
"*/ deriv"
+
index
+
"_2 += "
]
=
energyDerivExpressions
[
i
][
2
*
j
+
1
];
}
}
for
(
int
j
=
0
;
j
<
force
.
getNumEnergyParameterDerivatives
();
j
++
)
...
...
@@ -3188,32 +3128,16 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo
pairEnergyUsesValue
[
i
]
=
true
;
}
}
if (useLong) {
extraArgs << ", GLOBAL mm_ulong* RESTRICT derivBuffers";
for (int i = 0; i < numComputedValues; i++) {
string index = cc.intToString(i+1);
atomParams << "LOCAL real local_deriv" << index << "[LOCAL_BUFFER_SIZE];\n";
clearLocal << "local_deriv" << index << "[localAtomIndex] = 0.0f;\n";
declare1 << "real deriv" << index << "_1 = 0;\n";
load2 << "real deriv" << index << "_2 = 0;\n";
recordDeriv << "local_deriv" << index << "[atom2] += deriv" << index << "_2;\n";
storeDerivs1 << "STORE_DERIVATIVE_1(" << index << ")\n";
storeDerivs2 << "STORE_DERIVATIVE_2(" << index << ")\n";
}
}
else {
for (int i = 0; i < (int) energyDerivs->getParameterInfos().size(); i++) {
const ComputeParameterInfo& buffer = energyDerivs->getParameterInfos()[i];
string index = cc.intToString(i+1);
extraArgs << ", GLOBAL " << buffer.getType() << "* RESTRICT derivBuffers" << index;
atomParams << "LOCAL " << buffer.getType() << " local_deriv" << index << "[LOCAL_BUFFER_SIZE];\n";
clearLocal << "local_deriv" << index << "[localAtomIndex] = 0.0f;\n";
declare1 << buffer.getType() << " deriv" << index << "_1 = 0.0f;\n";
load2 << buffer.getType() << " deriv" << index << "_2 = 0.0f;\n";
recordDeriv << "local_deriv" << index << "[atom2] += deriv" << index << "_2;\n";
storeDerivs1 << "STORE_DERIVATIVE_1(" << index << ")\n";
storeDerivs2 << "STORE_DERIVATIVE_2(" << index << ")\n";
}
extraArgs
<<
", GLOBAL mm_ulong* RESTRICT derivBuffers"
;
for
(
int
i
=
0
;
i
<
numComputedValues
;
i
++
)
{
string
index
=
cc
.
intToString
(
i
+
1
);
atomParams
<<
"LOCAL real local_deriv"
<<
index
<<
"[LOCAL_BUFFER_SIZE];
\n
"
;
clearLocal
<<
"local_deriv"
<<
index
<<
"[localAtomIndex] = 0.0f;
\n
"
;
declare1
<<
"real deriv"
<<
index
<<
"_1 = 0;
\n
"
;
load2
<<
"real deriv"
<<
index
<<
"_2 = 0;
\n
"
;
recordDeriv
<<
"local_deriv"
<<
index
<<
"[atom2] += deriv"
<<
index
<<
"_2;
\n
"
;
storeDerivs1
<<
"STORE_DERIVATIVE_1("
<<
index
<<
")
\n
"
;
storeDerivs2
<<
"STORE_DERIVATIVE_2("
<<
index
<<
")
\n
"
;
}
if
(
needEnergyParamDerivs
)
{
extraArgs
<<
", GLOBAL mixed* RESTRICT energyParamDerivs"
;
...
...
@@ -3285,16 +3209,10 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo
string
index
=
cc
.
intToString
(
i
+
1
);
extraArgs
<<
", GLOBAL "
<<
buffer
.
getType
()
<<
"* RESTRICT derivChain"
<<
index
;
}
if (useLong) {
extraArgs << ", GLOBAL const mm_long* RESTRICT derivBuffersIn";
for (int i = 0; i < energyDerivs->getNumParameters(); ++i)
reduce << "derivBuffers" << energyDerivs->getParameterSuffix(i, "[index]") <<
" = RECIP((real) 0x100000000)*derivBuffersIn[index+PADDED_NUM_ATOMS*" << cc.intToString(i) << "];\n";
}
else {
for (int i = 0; i < (int) energyDerivs->getParameterInfos().size(); i++)
reduce << "REDUCE_VALUE(derivBuffers" << cc.intToString(i+1) << ", " << energyDerivs->getParameterInfos()[i].getType() << ")\n";
}
extraArgs
<<
", GLOBAL const mm_long* RESTRICT derivBuffersIn"
;
for
(
int
i
=
0
;
i
<
energyDerivs
->
getNumParameters
();
++
i
)
reduce
<<
"derivBuffers"
<<
energyDerivs
->
getParameterSuffix
(
i
,
"[index]"
)
<<
" = RECIP((real) 0x100000000)*derivBuffersIn[index+PADDED_NUM_ATOMS*"
<<
cc
.
intToString
(
i
)
<<
"];
\n
"
;
if
(
needEnergyParamDerivs
)
{
extraArgs
<<
", GLOBAL mixed* RESTRICT energyParamDerivs"
;
const
vector
<
string
>&
allParamDerivNames
=
cc
.
getEnergyParamDerivNames
();
...
...
@@ -3353,13 +3271,9 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo
string
index
=
cc
.
intToString
(
i
+
1
);
compute
<<
"derivBuffers"
<<
index
<<
"[index] = deriv"
<<
index
<<
";
\n
"
;
}
if (useLong) {
compute << "forceBuffers[index] += realToFixedPoint(force.x);\n";
compute << "forceBuffers[index+PADDED_NUM_ATOMS] += realToFixedPoint(force.y);\n";
compute << "forceBuffers[index+PADDED_NUM_ATOMS*2] += realToFixedPoint(force.z);\n";
}
else
compute << "forceBuffers[index] = forceBuffers[index]+make_real4(force.x, force.y, force.z, 0);\n";
compute
<<
"forceBuffers[index] += realToFixedPoint(force.x);
\n
"
;
compute
<<
"forceBuffers[index+PADDED_NUM_ATOMS] += realToFixedPoint(force.y);
\n
"
;
compute
<<
"forceBuffers[index+PADDED_NUM_ATOMS*2] += realToFixedPoint(force.z);
\n
"
;
for
(
int
i
=
1
;
i
<
numComputedValues
;
i
++
)
{
compute
<<
"real totalDeriv"
<<
i
<<
" = dV"
<<
i
<<
"dV0"
;
for
(
int
j
=
1
;
j
<
i
;
j
++
)
...
...
@@ -3548,12 +3462,7 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo
}
info
=
new
ForceInfo
(
force
);
cc
.
addForce
(
info
);
if (useLong)
cc.addAutoclearBuffer(longEnergyDerivs);
else {
for (auto& buffer : energyDerivs->getParameterInfos())
cc.addAutoclearBuffer(buffer.getArray());
}
cc
.
addAutoclearBuffer
(
longEnergyDerivs
);
}
double
CommonCalcCustomGBForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
...
...
@@ -3600,7 +3509,6 @@ double CommonCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
maxTiles
=
(
nb
.
getUseCutoff
()
?
nb
.
getInteractingTiles
().
getSize
()
:
0
);
int
numAtomBlocks
=
cc
.
getPaddedNumAtoms
()
/
32
;
bool useLong = cc.getSupports64BitGlobalAtomics();
pairValueKernel
->
addArg
(
cc
.
getPosq
());
pairValueKernel
->
addArg
(
cc
.
getNonbondedUtilities
().
getExclusions
());
pairValueKernel
->
addArg
(
cc
.
getNonbondedUtilities
().
getExclusionTiles
());
...
...
@@ -3631,10 +3539,6 @@ double CommonCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
pairValueKernel
->
addArg
(
function
);
perParticleValueKernel
->
addArg
(
cc
.
getPosq
());
perParticleValueKernel
->
addArg
(
valueBuffers
);
if (!useLong) {
perParticleValueKernel->addArg(cc.getPaddedNumAtoms());
perParticleValueKernel->addArg(cc.getForceBuffers().getSize()/cc.getPaddedNumAtoms());
}
if
(
globals
.
isInitialized
())
perParticleValueKernel
->
addArg
(
globals
);
for
(
auto
&
buffer
:
params
->
getParameterInfos
())
...
...
@@ -3648,7 +3552,7 @@ double CommonCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
}
for
(
auto
&
function
:
tabulatedFunctionArrays
)
perParticleValueKernel
->
addArg
(
function
);
pairEnergyKernel->addArg(
useLong ?
cc.getLongForceBuffer()
: cc.getForceBuffers()
);
pairEnergyKernel
->
addArg
(
cc
.
getLongForceBuffer
());
pairEnergyKernel
->
addArg
(
cc
.
getEnergyBuffer
());
pairEnergyKernel
->
addArg
(
cc
.
getPosq
());
pairEnergyKernel
->
addArg
(
cc
.
getNonbondedUtilities
().
getExclusions
());
...
...
@@ -3680,24 +3584,14 @@ double CommonCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
pairEnergyKernel
->
addArg
(
buffer
.
getArray
());
}
}
if (useLong)
pairEnergyKernel->addArg(longEnergyDerivs);
else
for (auto& buffer : energyDerivs->getParameterInfos())
pairEnergyKernel->addArg(buffer.getArray());
pairEnergyKernel
->
addArg
(
longEnergyDerivs
);
if
(
needEnergyParamDerivs
)
pairEnergyKernel
->
addArg
(
cc
.
getEnergyParamDerivBuffer
());
for
(
auto
&
function
:
tabulatedFunctionArrays
)
pairEnergyKernel
->
addArg
(
function
);
perParticleEnergyKernel
->
addArg
(
cc
.
getEnergyBuffer
());
perParticleEnergyKernel
->
addArg
(
cc
.
getPosq
());
if (cc.getSupports64BitGlobalAtomics())
perParticleEnergyKernel->addArg(cc.getLongForceBuffer());
else {
perParticleEnergyKernel->addArg(cc.getForceBuffers());
perParticleEnergyKernel->addArg(cc.getPaddedNumAtoms());
perParticleEnergyKernel->addArg(cc.getForceBuffers().getSize()/cc.getPaddedNumAtoms());
}
perParticleEnergyKernel
->
addArg
(
cc
.
getLongForceBuffer
());
if
(
globals
.
isInitialized
())
perParticleEnergyKernel
->
addArg
(
globals
);
for
(
auto
&
buffer
:
params
->
getParameterInfos
())
...
...
@@ -3708,15 +3602,14 @@ double CommonCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
perParticleEnergyKernel
->
addArg
(
buffer
.
getArray
());
for
(
auto
&
buffer
:
energyDerivChain
->
getParameterInfos
())
perParticleEnergyKernel
->
addArg
(
buffer
.
getArray
());
if (useLong)
perParticleEnergyKernel->addArg(longEnergyDerivs);
perParticleEnergyKernel
->
addArg
(
longEnergyDerivs
);
if
(
needEnergyParamDerivs
)
perParticleEnergyKernel
->
addArg
(
cc
.
getEnergyParamDerivBuffer
());
for
(
auto
&
function
:
tabulatedFunctionArrays
)
perParticleEnergyKernel
->
addArg
(
function
);
if
(
needParameterGradient
||
needEnergyParamDerivs
)
{
gradientChainRuleKernel
->
addArg
(
cc
.
getPosq
());
gradientChainRuleKernel->addArg(
useLong ?
cc.getLongForceBuffer()
: cc.getForceBuffers()
);
gradientChainRuleKernel
->
addArg
(
cc
.
getLongForceBuffer
());
if
(
globals
.
isInitialized
())
gradientChainRuleKernel
->
addArg
(
globals
);
for
(
auto
&
buffer
:
params
->
getParameterInfos
())
...
...
@@ -3937,33 +3830,6 @@ void CommonCalcCustomHbondForceKernel::initialize(const System& system, const Cu
}
acceptors
.
upload
(
acceptorVector
);
acceptorParams
->
setParameterValues
(
acceptorParamVector
);
// Select an output buffer index for each donor and acceptor.
if (!cc.getSupports64BitGlobalAtomics()) {
donorBufferIndices.initialize<mm_int4>(cc, numDonors, "customHbondDonorBuffers");
acceptorBufferIndices.initialize<mm_int4>(cc, numAcceptors, "customHbondAcceptorBuffers");
vector<mm_int4> donorBufferVector(numDonors);
vector<mm_int4> acceptorBufferVector(numAcceptors);
vector<int> donorBufferCounter(numParticles, 0);
for (int i = 0; i < numDonors; i++)
donorBufferVector[i] = mm_int4(donorVector[i].x > -1 ? donorBufferCounter[donorVector[i].x]++ : 0,
donorVector[i].y > -1 ? donorBufferCounter[donorVector[i].y]++ : 0,
donorVector[i].z > -1 ? donorBufferCounter[donorVector[i].z]++ : 0, 0);
vector<int> acceptorBufferCounter(numParticles, 0);
for (int i = 0; i < numAcceptors; i++)
acceptorBufferVector[i] = mm_int4(acceptorVector[i].x > -1 ? acceptorBufferCounter[acceptorVector[i].x]++ : 0,
acceptorVector[i].y > -1 ? acceptorBufferCounter[acceptorVector[i].y]++ : 0,
acceptorVector[i].z > -1 ? acceptorBufferCounter[acceptorVector[i].z]++ : 0, 0);
donorBufferIndices.upload(donorBufferVector);
acceptorBufferIndices.upload(acceptorBufferVector);
int maxBuffers = 1;
for (int i : donorBufferCounter)
maxBuffers = max(maxBuffers, i);
for (int i : acceptorBufferCounter)
maxBuffers = max(maxBuffers, i);
cc.requestForceBuffers(maxBuffers);
}
info
=
new
ForceInfo
(
force
);
cc
.
addForce
(
info
);
...
...
@@ -4243,12 +4109,7 @@ double CommonCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
}
if
(
!
hasInitializedKernel
)
{
hasInitializedKernel
=
true
;
if (cc.getSupports64BitGlobalAtomics())
donorKernel->addArg(cc.getLongForceBuffer());
else {
donorKernel->addArg(cc.getForceBuffers());
donorKernel->addArg(donorBufferIndices);
}
donorKernel
->
addArg
(
cc
.
getLongForceBuffer
());
donorKernel
->
addArg
(
cc
.
getEnergyBuffer
());
donorKernel
->
addArg
(
cc
.
getPosq
());
donorKernel
->
addArg
(
donorExclusions
);
...
...
@@ -4264,12 +4125,7 @@ double CommonCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
donorKernel
->
addArg
(
parameter
.
getArray
());
for
(
auto
&
function
:
tabulatedFunctionArrays
)
donorKernel
->
addArg
(
function
);
if (cc.getSupports64BitGlobalAtomics())
acceptorKernel->addArg(cc.getLongForceBuffer());
else {
acceptorKernel->addArg(cc.getForceBuffers());
acceptorKernel->addArg(acceptorBufferIndices);
}
acceptorKernel
->
addArg
(
cc
.
getLongForceBuffer
());
acceptorKernel
->
addArg
(
cc
.
getEnergyBuffer
());
acceptorKernel
->
addArg
(
cc
.
getPosq
());
acceptorKernel
->
addArg
(
acceptorExclusions
);
...
...
@@ -4286,9 +4142,9 @@ double CommonCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
for
(
auto
&
function
:
tabulatedFunctionArrays
)
acceptorKernel
->
addArg
(
function
);
}
setPeriodicBoxArgs(cc, donorKernel,
cc.getSupports64BitGlobalAtomics() ? 6 : 7
);
setPeriodicBoxArgs
(
cc
,
donorKernel
,
6
);
donorKernel
->
execute
(
max
(
numDonors
,
numAcceptors
),
64
);
setPeriodicBoxArgs(cc, acceptorKernel,
cc.getSupports64BitGlobalAtomics() ? 6 : 7
);
setPeriodicBoxArgs
(
cc
,
acceptorKernel
,
6
);
acceptorKernel
->
execute
(
max
(
numDonors
,
numAcceptors
),
64
);
return
0.0
;
}
...
...
@@ -4391,8 +4247,6 @@ CommonCalcCustomManyParticleForceKernel::~CommonCalcCustomManyParticleForceKerne
void
CommonCalcCustomManyParticleForceKernel
::
initialize
(
const
System
&
system
,
const
CustomManyParticleForce
&
force
)
{
ContextSelector
selector
(
cc
);
if (!cc.getSupports64BitGlobalAtomics())
throw OpenMMException("CustomManyParticleForce requires a device that supports 64 bit atomic operations");
int
numParticles
=
force
.
getNumParticles
();
int
particlesPerSet
=
force
.
getNumParticlesPerSet
();
bool
centralParticleMode
=
(
force
.
getPermutationMode
()
==
CustomManyParticleForce
::
UniqueCentralParticle
);
...
...
@@ -4933,9 +4787,6 @@ private:
};
void
CommonCalcGayBerneForceKernel
::
initialize
(
const
System
&
system
,
const
GayBerneForce
&
force
)
{
if (!cc.getSupports64BitGlobalAtomics())
throw OpenMMException("GayBerneForce requires a device that supports 64 bit atomic operations");
// Initialize interactions.
ContextSelector
selector
(
cc
);
...
...
platforms/common/src/IntegrationUtilities.cpp
View file @
ae686364
...
...
@@ -528,8 +528,6 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
if
(
atomCounts
[
i
]
>
1
)
hasOverlappingVsites
=
true
;
if
(
hasOverlappingVsites
&&
!
context
.
getSupports64BitGlobalAtomics
())
throw
OpenMMException
(
"This device does not support 64 bit atomics. Cannot have multiple virtual sites that depend on the same atom."
);
// Create the kernels used by this class.
...
...
platforms/common/src/kernels/customGBEnergyN2.cc
View file @
ae686364
#ifdef SUPPORTS_64_BIT_ATOMICS
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(deriv##INDEX##_1));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_deriv##INDEX[LOCAL_ID]));
#else
#define STORE_DERIVATIVE_1(INDEX) derivBuffers##INDEX[offset] += deriv##INDEX##_1;
#define STORE_DERIVATIVE_2(INDEX) derivBuffers##INDEX[offset] += local_deriv##INDEX[LOCAL_ID];
#endif
/**
* Compute a force based on pair interactions.
*/
KERNEL
void
computeN2Energy
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_ulong
*
RESTRICT
forceBuffers
,
#else
GLOBAL
real4
*
RESTRICT
forceBuffers
,
#endif
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
unsigned
int
*
RESTRICT
exclusions
,
GLOBAL
const
int2
*
exclusionTiles
,
int
needEnergy
,
...
...
@@ -160,7 +151,6 @@ KERNEL void computeN2Energy(
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
...
...
@@ -173,18 +163,6 @@ KERNEL void computeN2Energy(
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
LOCAL_ID
].
z
));
STORE_DERIVATIVES_2
}
#else
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset
=
offset1
;
forceBuffers
[
offset1
].
xyz
+=
force
.
xyz
;
STORE_DERIVATIVES_1
if
(
x
!=
y
)
{
offset
=
offset2
;
forceBuffers
[
offset2
]
+=
(
real4
)
(
local_force
[
LOCAL_ID
].
x
,
local_force
[
LOCAL_ID
].
y
,
local_force
[
LOCAL_ID
].
z
,
0.0
f
);
STORE_DERIVATIVES_2
}
#endif
}
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
...
...
@@ -363,7 +341,6 @@ KERNEL void computeN2Energy(
#else
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
...
...
@@ -376,18 +353,6 @@ KERNEL void computeN2Energy(
offset
=
atom2
;
STORE_DERIVATIVES_2
}
#else
unsigned
int
offset1
=
atom1
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
atom2
+
warp
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset1
].
xyz
+=
force
.
xyz
;
unsigned
int
offset
=
offset1
;
STORE_DERIVATIVES_1
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
forceBuffers
[
offset2
]
+=
(
real4
)
(
local_force
[
LOCAL_ID
].
x
,
local_force
[
LOCAL_ID
].
y
,
local_force
[
LOCAL_ID
].
z
,
0.0
f
);
offset
=
offset2
;
STORE_DERIVATIVES_2
}
#endif
}
pos
++
;
}
...
...
platforms/common/src/kernels/customGBEnergyN2_cpu.cc
View file @
ae686364
#ifdef SUPPORTS_64_BIT_ATOMICS
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(deriv##INDEX##_1));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_deriv##INDEX[tgx]));
#else
#define STORE_DERIVATIVE_1(INDEX) derivBuffers##INDEX[offset] += deriv##INDEX##_1;
#define STORE_DERIVATIVE_2(INDEX) derivBuffers##INDEX[offset] += local_deriv##INDEX[tgx];
#endif
/**
* Compute a force based on pair interactions.
*/
KERNEL
void
computeN2Energy
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_ulong
*
RESTRICT
forceBuffers
,
#else
GLOBAL
real4
*
RESTRICT
forceBuffers
,
#endif
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
unsigned
int
*
RESTRICT
exclusions
,
GLOBAL
const
int2
*
exclusionTiles
,
int
needEnergy
,
...
...
@@ -100,17 +91,11 @@ KERNEL void computeN2Energy(
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
atom1
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
STORE_DERIVATIVES_1
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
].
xyz
+=
force
.
xyz
;
STORE_DERIVATIVES_1
#endif
}
}
else
{
...
...
@@ -174,33 +159,21 @@ KERNEL void computeN2Energy(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
atom1
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
STORE_DERIVATIVES_1
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
].
xyz
+=
force
.
xyz
;
STORE_DERIVATIVES_1
#endif
}
// Write results.
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
z
));
STORE_DERIVATIVES_2
#else
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
].
xyz
+=
local_force
[
tgx
].
xyz
;
STORE_DERIVATIVES_2
#endif
}
}
}
...
...
@@ -316,17 +289,11 @@ KERNEL void computeN2Energy(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
atom1
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
STORE_DERIVATIVES_1
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
].
xyz
+=
force
.
xyz
;
STORE_DERIVATIVES_1
#endif
}
}
else
...
...
@@ -375,17 +342,11 @@ KERNEL void computeN2Energy(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
atom1
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
STORE_DERIVATIVES_1
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
].
xyz
+=
force
.
xyz
;
STORE_DERIVATIVES_1
#endif
}
}
...
...
@@ -398,17 +359,11 @@ KERNEL void computeN2Energy(
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
z
));
unsigned
int
offset
=
atom2
;
STORE_DERIVATIVES_2
#else
unsigned
int
offset
=
atom2
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
].
xyz
+=
local_force
[
tgx
].
xyz
;
STORE_DERIVATIVES_2
#endif
}
}
}
...
...
platforms/common/src/kernels/customGBEnergyPerParticle.cc
View file @
ae686364
...
...
@@ -10,20 +10,13 @@
*/
KERNEL
void
computePerParticleEnergy
(
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_long
*
RESTRICT
forceBuffers
#else
GLOBAL
real4
*
RESTRICT
forceBuffers
,
int
bufferSize
,
int
numBuffers
#endif
PARAMETER_ARGUMENTS
)
{
mixed
energy
=
0
;
INIT_PARAM_DERIVS
for
(
int
index
=
GLOBAL_ID
;
index
<
NUM_ATOMS
;
index
+=
GLOBAL_SIZE
)
{
// Reduce the derivatives
#ifndef SUPPORTS_64_BIT_ATOMICS
int
totalSize
=
bufferSize
*
numBuffers
;
#endif
REDUCE_DERIVATIVES
// Now calculate the per-particle energy terms.
...
...
platforms/common/src/kernels/customGBGradientChainRule.cc
View file @
ae686364
...
...
@@ -3,29 +3,17 @@
*/
KERNEL
void
computeGradientChainRuleTerms
(
GLOBAL
const
real4
*
RESTRICT
posq
,
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_long
*
RESTRICT
forceBuffers
#else
GLOBAL
real4
*
RESTRICT
forceBuffers
#endif
PARAMETER_ARGUMENTS
)
{
INIT_PARAM_DERIVS
const
real
scale
=
RECIP
((
real
)
0x100000000
);
for
(
int
index
=
GLOBAL_ID
;
index
<
NUM_ATOMS
;
index
+=
GLOBAL_SIZE
)
{
real4
pos
=
posq
[
index
];
#ifdef SUPPORTS_64_BIT_ATOMICS
real3
force
=
make_real3
(
scale
*
forceBuffers
[
index
],
scale
*
forceBuffers
[
index
+
PADDED_NUM_ATOMS
],
scale
*
forceBuffers
[
index
+
PADDED_NUM_ATOMS
*
2
]);
#else
real3
force
=
trimTo3
(
forceBuffers
[
index
]);
#endif
COMPUTE_FORCES
#ifdef SUPPORTS_64_BIT_ATOMICS
forceBuffers
[
index
]
=
realToFixedPoint
(
force
.
x
);
forceBuffers
[
index
+
PADDED_NUM_ATOMS
]
=
realToFixedPoint
(
force
.
y
);
forceBuffers
[
index
+
PADDED_NUM_ATOMS
*
2
]
=
realToFixedPoint
(
force
.
z
);
#else
forceBuffers
[
index
]
=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
#endif
}
SAVE_PARAM_DERIVS
}
platforms/common/src/kernels/customGBValueN2.cc
View file @
ae686364
...
...
@@ -3,11 +3,7 @@
*/
KERNEL
void
computeN2Value
(
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
unsigned
int
*
RESTRICT
exclusions
,
GLOBAL
const
int2
*
exclusionTiles
,
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_ulong
*
RESTRICT
global_value
,
#else
GLOBAL
real
*
RESTRICT
global_value
,
#endif
#ifdef USE_CUTOFF
GLOBAL
const
int
*
RESTRICT
tiles
,
GLOBAL
const
unsigned
int
*
RESTRICT
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
GLOBAL
const
real4
*
RESTRICT
blockCenter
,
...
...
@@ -137,7 +133,6 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
STORE_PARAM_DERIVS1
...
...
@@ -146,16 +141,6 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
LOCAL_ID
]));
STORE_PARAM_DERIVS2
}
#else
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
STORE_PARAM_DERIVS1
if
(
x
!=
y
)
{
global_value
[
offset2
]
+=
local_value
[
LOCAL_ID
];
STORE_PARAM_DERIVS2
}
#endif
}
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
...
...
@@ -317,7 +302,6 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#else
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
STORE_PARAM_DERIVS1
...
...
@@ -326,16 +310,6 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
LOCAL_ID
]));
STORE_PARAM_DERIVS2
}
#else
unsigned
int
offset1
=
atom1
+
warp
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
STORE_PARAM_DERIVS1
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
unsigned
int
offset2
=
atom2
+
warp
*
PADDED_NUM_ATOMS
;
global_value
[
offset2
]
+=
local_value
[
LOCAL_ID
];
STORE_PARAM_DERIVS2
}
#endif
}
pos
++
;
}
...
...
platforms/common/src/kernels/customGBValueN2_cpu.cc
View file @
ae686364
...
...
@@ -3,11 +3,7 @@
*/
KERNEL
void
computeN2Value
(
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
unsigned
int
*
RESTRICT
exclusions
,
GLOBAL
const
int2
*
exclusionTiles
,
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_ulong
*
RESTRICT
global_value
,
#else
GLOBAL
real
*
RESTRICT
global_value
,
#endif
#ifdef USE_CUTOFF
GLOBAL
const
int
*
RESTRICT
tiles
,
GLOBAL
const
unsigned
int
*
RESTRICT
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
GLOBAL
const
real4
*
RESTRICT
blockCenter
,
...
...
@@ -84,13 +80,8 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
#else
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
#endif
STORE_PARAM_DERIVS1
}
}
...
...
@@ -146,26 +137,16 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
#else
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
#endif
STORE_PARAM_DERIVS1
}
// Write results.
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
tgx
]));
#else
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset2
]
+=
local_value
[
tgx
];
#endif
STORE_PARAM_DERIVS2
}
}
...
...
@@ -273,13 +254,8 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
#else
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
#endif
STORE_PARAM_DERIVS1
}
}
...
...
@@ -322,13 +298,8 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
#else
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
#endif
STORE_PARAM_DERIVS1
}
}
...
...
@@ -342,13 +313,8 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset2
=
atom2
;
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
tgx
]));
#else
unsigned
int
offset2
=
atom2
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset2
]
+=
local_value
[
tgx
];
#endif
STORE_PARAM_DERIVS2
}
}
...
...
platforms/common/src/kernels/customGBValuePerParticle.cc
View file @
ae686364
...
...
@@ -3,23 +3,12 @@
*/
KERNEL
void
computePerParticleValues
(
GLOBAL
real4
*
posq
,
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_long
*
valueBuffers
#else
GLOBAL
real
*
valueBuffers
,
int
bufferSize
,
int
numBuffers
#endif
PARAMETER_ARGUMENTS
)
{
for
(
int
index
=
GLOBAL_ID
;
index
<
NUM_ATOMS
;
index
+=
GLOBAL_SIZE
)
{
// Reduce the pairwise value
#ifdef SUPPORTS_64_BIT_ATOMICS
real
sum
=
valueBuffers
[
index
]
/
(
real
)
0x100000000
;
#else
int
totalSize
=
bufferSize
*
numBuffers
;
real
sum
=
valueBuffers
[
index
];
for
(
int
i
=
index
+
bufferSize
;
i
<
totalSize
;
i
+=
bufferSize
)
sum
+=
valueBuffers
[
i
];
#endif
REDUCE_PARAM0_DERIV
// Now calculate other values
...
...
platforms/common/src/kernels/customHbondForce.cc
View file @
ae686364
...
...
@@ -44,11 +44,7 @@ inline DEVICE real4 computeCross(real4 vec1, real4 vec2) {
* Compute forces on donors.
*/
KERNEL
void
computeDonorForces
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_ulong
*
RESTRICT
force
,
#else
GLOBAL
real4
*
RESTRICT
forceBuffers
,
GLOBAL
const
int4
*
RESTRICT
donorBufferIndices
,
#endif
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
int4
*
RESTRICT
exclusions
,
GLOBAL
const
int4
*
RESTRICT
donorAtoms
,
GLOBAL
const
int4
*
RESTRICT
acceptorAtoms
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
...
...
@@ -114,7 +110,6 @@ KERNEL void computeDonorForces(
// Write results
if
(
donorIndex
<
NUM_DONORS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
if
(
atoms
.
x
>
-
1
)
{
ATOMIC_ADD
(
&
force
[
atoms
.
x
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
x
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
y
));
...
...
@@ -133,27 +128,6 @@ KERNEL void computeDonorForces(
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f3
.
z
));
MEM_FENCE
;
}
#else
int4
bufferIndices
=
donorBufferIndices
[
donorIndex
];
if
(
atoms
.
x
>
-
1
)
{
unsigned
int
offset
=
atoms
.
x
+
bufferIndices
.
x
*
PADDED_NUM_ATOMS
;
real4
force
=
forceBuffers
[
offset
];
force
.
xyz
+=
f1
.
xyz
;
forceBuffers
[
offset
]
=
force
;
}
if
(
atoms
.
y
>
-
1
)
{
unsigned
int
offset
=
atoms
.
y
+
bufferIndices
.
y
*
PADDED_NUM_ATOMS
;
real4
force
=
forceBuffers
[
offset
];
force
.
xyz
+=
f2
.
xyz
;
forceBuffers
[
offset
]
=
force
;
}
if
(
atoms
.
z
>
-
1
)
{
unsigned
int
offset
=
atoms
.
z
+
bufferIndices
.
z
*
PADDED_NUM_ATOMS
;
real4
force
=
forceBuffers
[
offset
];
force
.
xyz
+=
f3
.
xyz
;
forceBuffers
[
offset
]
=
force
;
}
#endif
}
}
energyBuffer
[
GLOBAL_ID
]
+=
energy
;
...
...
@@ -162,11 +136,7 @@ KERNEL void computeDonorForces(
* Compute forces on acceptors.
*/
KERNEL
void
computeAcceptorForces
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_ulong
*
RESTRICT
force
,
#else
GLOBAL
real4
*
RESTRICT
forceBuffers
,
GLOBAL
const
int4
*
RESTRICT
acceptorBufferIndices
,
#endif
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
int4
*
RESTRICT
exclusions
,
GLOBAL
const
int4
*
RESTRICT
donorAtoms
,
GLOBAL
const
int4
*
RESTRICT
acceptorAtoms
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
...
...
@@ -231,7 +201,6 @@ KERNEL void computeAcceptorForces(
// Write results
if
(
acceptorIndex
<
NUM_ACCEPTORS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
if
(
atoms
.
x
>
-
1
)
{
ATOMIC_ADD
(
&
force
[
atoms
.
x
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
x
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
y
));
...
...
@@ -250,27 +219,6 @@ KERNEL void computeAcceptorForces(
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f3
.
z
));
MEM_FENCE
;
}
#else
int4
bufferIndices
=
acceptorBufferIndices
[
acceptorIndex
];
if
(
atoms
.
x
>
-
1
)
{
unsigned
int
offset
=
atoms
.
x
+
bufferIndices
.
x
*
PADDED_NUM_ATOMS
;
real4
force
=
forceBuffers
[
offset
];
force
.
xyz
+=
f1
.
xyz
;
forceBuffers
[
offset
]
=
force
;
}
if
(
atoms
.
y
>
-
1
)
{
unsigned
int
offset
=
atoms
.
y
+
bufferIndices
.
y
*
PADDED_NUM_ATOMS
;
real4
force
=
forceBuffers
[
offset
];
force
.
xyz
+=
f2
.
xyz
;
forceBuffers
[
offset
]
=
force
;
}
if
(
atoms
.
z
>
-
1
)
{
unsigned
int
offset
=
atoms
.
z
+
bufferIndices
.
z
*
PADDED_NUM_ATOMS
;
real4
force
=
forceBuffers
[
offset
];
force
.
xyz
+=
f3
.
xyz
;
forceBuffers
[
offset
]
=
force
;
}
#endif
}
}
}
platforms/common/src/kernels/customNonbondedGroups.cc
View file @
ae686364
...
...
@@ -35,38 +35,8 @@ DEVICE int reduceMax(int val, LOCAL_ARG int* temp) {
#endif
}
#ifndef SUPPORTS_64_BIT_ATOMICS
/**
* This function is used on devices that don't support 64 bit atomics. Multiple threads within
* a single tile might have computed forces on the same atom. This loops over them and makes sure
* that only one thread updates the force on any given atom.
*/
void
writeForces
(
GLOBAL
real4
*
forceBuffers
,
LOCAL
AtomData
*
localData
,
int
atomIndex
)
{
localData
[
LOCAL_ID
].
x
=
atomIndex
;
SYNC_WARPS
;
real4
forceSum
=
make_real4
(
0
);
int
start
=
(
LOCAL_ID
/
TILE_SIZE
)
*
TILE_SIZE
;
int
end
=
start
+
32
;
bool
isFirst
=
true
;
for
(
int
i
=
start
;
i
<
end
;
i
++
)
if
(
localData
[
i
].
x
==
atomIndex
)
{
forceSum
+=
(
real4
)
(
localData
[
i
].
fx
,
localData
[
i
].
fy
,
localData
[
i
].
fz
,
0
);
isFirst
&=
(
i
>=
LOCAL_ID
);
}
const
unsigned
int
warp
=
GLOBAL_ID
/
TILE_SIZE
;
unsigned
int
offset
=
atomIndex
+
warp
*
PADDED_NUM_ATOMS
;
if
(
isFirst
)
forceBuffers
[
offset
]
+=
forceSum
;
SYNC_WARPS
;
}
#endif
KERNEL
void
computeInteractionGroups
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_ulong
*
RESTRICT
forceBuffers
,
#else
GLOBAL
real4
*
RESTRICT
forceBuffers
,
#endif
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
int4
*
RESTRICT
groupData
,
GLOBAL
const
int
*
RESTRICT
numGroupTiles
,
int
useNeighborList
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
...
...
@@ -139,7 +109,6 @@ KERNEL void computeInteractionGroups(
}
SYNC_WARPS
;
}
#ifdef SUPPORTS_64_BIT_ATOMICS
if
(
exclusions
!=
0
)
{
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
...
...
@@ -149,13 +118,6 @@ KERNEL void computeInteractionGroups(
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fz
));
SYNC_WARPS
;
#else
writeForces
(
forceBuffers
,
localData
,
atom2
);
localData
[
LOCAL_ID
].
fx
=
force
.
x
;
localData
[
LOCAL_ID
].
fy
=
force
.
y
;
localData
[
LOCAL_ID
].
fz
=
force
.
z
;
writeForces
(
forceBuffers
,
localData
,
atom1
);
#endif
}
energyBuffer
[
GLOBAL_ID
]
+=
energy
;
SAVE_DERIVATIVES
...
...
platforms/common/src/kernels/gbsaObc.cc
View file @
ae686364
...
...
@@ -17,11 +17,7 @@ typedef struct ALIGN {
* Compute the Born sum.
*/
KERNEL
void
computeBornSum
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_ulong
*
RESTRICT
global_bornSum
,
#else
GLOBAL
real
*
RESTRICT
global_bornSum
,
#endif
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
real
*
RESTRICT
charge
,
GLOBAL
const
float2
*
RESTRICT
global_params
,
#ifdef USE_CUTOFF
GLOBAL
const
int
*
RESTRICT
tiles
,
GLOBAL
const
unsigned
int
*
RESTRICT
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
...
...
@@ -152,20 +148,12 @@ KERNEL void computeBornSum(
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
bornSum
));
if
(
x
!=
y
)
{
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
bornSum
));
}
#else
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset1
]
+=
bornSum
;
if
(
x
!=
y
)
global_bornSum
[
offset2
]
+=
localData
[
LOCAL_ID
].
bornSum
;
#endif
}
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
...
...
@@ -357,17 +345,9 @@ KERNEL void computeBornSum(
#else
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
bornSum
));
if
(
atom2
<
PADDED_NUM_ATOMS
)
ATOMIC_ADD
(
&
global_bornSum
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
bornSum
));
#else
unsigned
int
offset1
=
atom1
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
atom2
+
warp
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset1
]
+=
bornSum
;
if
(
atom2
<
PADDED_NUM_ATOMS
)
global_bornSum
[
offset2
]
+=
localData
[
LOCAL_ID
].
bornSum
;
#endif
}
pos
++
;
}
...
...
@@ -385,11 +365,7 @@ typedef struct ALIGN {
*/
KERNEL
void
computeGBSAForce1
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_ulong
*
RESTRICT
forceBuffers
,
GLOBAL
mm_ulong
*
RESTRICT
global_bornForce
,
#else
GLOBAL
real4
*
RESTRICT
forceBuffers
,
GLOBAL
real
*
RESTRICT
global_bornForce
,
#endif
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
real
*
RESTRICT
charge
,
GLOBAL
const
real
*
RESTRICT
global_bornRadii
,
int
needEnergy
,
#ifdef USE_CUTOFF
...
...
@@ -538,7 +514,6 @@ KERNEL void computeGBSAForce1(
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
...
...
@@ -551,16 +526,6 @@ KERNEL void computeGBSAForce1(
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fz
));
ATOMIC_ADD
(
&
global_bornForce
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fw
));
}
#else
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset1
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
global_bornForce
[
offset1
]
+=
force
.
w
;
if
(
x
!=
y
)
{
forceBuffers
[
offset2
]
+=
(
real4
)
(
localData
[
LOCAL_ID
].
fx
,
localData
[
LOCAL_ID
].
fy
,
localData
[
LOCAL_ID
].
fz
,
0.0
f
);
global_bornForce
[
offset2
]
+=
localData
[
LOCAL_ID
].
fw
;
}
#endif
}
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
...
...
@@ -763,7 +728,6 @@ KERNEL void computeGBSAForce1(
#else
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
...
...
@@ -774,16 +738,6 @@ KERNEL void computeGBSAForce1(
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fz
));
ATOMIC_ADD
(
&
global_bornForce
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fw
));
}
#else
unsigned
int
offset1
=
atom1
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
atom2
+
warp
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset1
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
global_bornForce
[
offset1
]
+=
force
.
w
;
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
forceBuffers
[
offset2
]
+=
(
real4
)
(
localData
[
LOCAL_ID
].
fx
,
localData
[
LOCAL_ID
].
fy
,
localData
[
LOCAL_ID
].
fz
,
0.0
f
);
global_bornForce
[
offset2
]
+=
localData
[
LOCAL_ID
].
fw
;
}
#endif
}
pos
++
;
}
...
...
platforms/common/src/kernels/gbsaObc2.cc
View file @
ae686364
...
...
@@ -16,13 +16,8 @@
real
t2I
=
(
l_ij2I
-
u_ij2I
);
real
term1
=
(
0.5
f
*
(
0.25
f
+
OBC_PARAMS2
.
y
*
OBC_PARAMS2
.
y
*
invRSquaredOver4
)
*
t2J
+
t1J
*
invRSquaredOver4
)
*
invR
;
real
term2
=
(
0.5
f
*
(
0.25
f
+
OBC_PARAMS1
.
y
*
OBC_PARAMS1
.
y
*
invRSquaredOver4
)
*
t2I
+
t1I
*
invRSquaredOver4
)
*
invR
;
#ifdef SUPPORTS_64_BIT_ATOMICS
real
tempdEdR
=
(
OBC_PARAMS1
.
x
<
rScaledRadiusJ
?
BORN_FORCE1
*
term1
/
0x100000000
:
0
);
tempdEdR
+=
(
OBC_PARAMS2
.
x
<
rScaledRadiusI
?
BORN_FORCE2
*
term2
/
0x100000000
:
0
);
#else
real
tempdEdR
=
(
OBC_PARAMS1
.
x
<
rScaledRadiusJ
?
BORN_FORCE1
*
term1
:
(
real
)
0
);
tempdEdR
+=
(
OBC_PARAMS2
.
x
<
rScaledRadiusI
?
BORN_FORCE2
*
term2
:
(
real
)
0
);
#endif
#ifdef USE_CUTOFF
unsigned
int
includeInteraction
=
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
&&
atom1
!=
atom2
&&
r2
<
CUTOFF_SQUARED
);
#else
...
...
platforms/common/src/kernels/gbsaObcReductions.cc
View file @
ae686364
...
...
@@ -6,23 +6,12 @@
*/
KERNEL
void
reduceBornSum
(
float
alpha
,
float
beta
,
float
gamma
,
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
const
mm_long
*
RESTRICT
bornSum
,
#else
GLOBAL
const
real
*
RESTRICT
bornSum
,
int
bufferSize
,
int
numBuffers
,
#endif
GLOBAL
const
float2
*
RESTRICT
params
,
GLOBAL
real
*
RESTRICT
bornRadii
,
GLOBAL
real
*
RESTRICT
obcChain
)
{
for
(
unsigned
int
index
=
GLOBAL_ID
;
index
<
NUM_ATOMS
;
index
+=
GLOBAL_SIZE
)
{
// Get summed Born data
#ifdef SUPPORTS_64_BIT_ATOMICS
real
sum
=
RECIP
((
real
)
0x100000000
)
*
bornSum
[
index
];
#else
real
sum
=
bornSum
[
index
];
int
totalSize
=
bufferSize
*
numBuffers
;
for
(
int
i
=
index
+
bufferSize
;
i
<
totalSize
;
i
+=
bufferSize
)
sum
+=
bornSum
[
i
];
#endif
// Now calculate Born radius and OBC term.
...
...
@@ -45,24 +34,14 @@ KERNEL void reduceBornSum(float alpha, float beta, float gamma,
*/
KERNEL
void
reduceBornForce
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_long
*
RESTRICT
bornForce
,
#else
GLOBAL
real
*
bornForce
,
int
bufferSize
,
int
numBuffers
,
#endif
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
float2
*
RESTRICT
params
,
GLOBAL
const
real
*
RESTRICT
bornRadii
,
GLOBAL
const
real
*
RESTRICT
obcChain
)
{
mixed
energy
=
0
;
for
(
unsigned
int
index
=
GLOBAL_ID
;
index
<
NUM_ATOMS
;
index
+=
GLOBAL_SIZE
)
{
// Get summed Born force
#ifdef SUPPORTS_64_BIT_ATOMICS
real
force
=
RECIP
((
real
)
0x100000000
)
*
bornForce
[
index
];
#else
real
force
=
bornForce
[
index
];
int
totalSize
=
bufferSize
*
numBuffers
;
for
(
int
i
=
index
+
bufferSize
;
i
<
totalSize
;
i
+=
bufferSize
)
force
+=
bornForce
[
i
];
#endif
// Now calculate the actual force
float
offsetRadius
=
params
[
index
].
x
;
...
...
@@ -73,11 +52,7 @@ KERNEL void reduceBornForce(
force
+=
saTerm
/
bornRadius
;
energy
+=
saTerm
;
force
*=
bornRadius
*
bornRadius
*
obcChain
[
index
];
#ifdef SUPPORTS_64_BIT_ATOMICS
bornForce
[
index
]
=
realToFixedPoint
(
force
);
#else
bornForce
[
index
]
=
force
;
#endif
}
energyBuffer
[
GLOBAL_ID
]
+=
energy
/-
6
;
}
platforms/common/src/kernels/gbsaObc_cpu.cc
View file @
ae686364
...
...
@@ -9,11 +9,7 @@ typedef struct {
* Compute the Born sum.
*/
KERNEL
void
computeBornSum
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_long
*
RESTRICT
global_bornSum
,
#else
GLOBAL
real
*
RESTRICT
global_bornSum
,
#endif
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
real
*
RESTRICT
charge
,
GLOBAL
const
float2
*
RESTRICT
global_params
,
#ifdef USE_CUTOFF
GLOBAL
const
int
*
RESTRICT
tiles
,
GLOBAL
const
unsigned
int
*
RESTRICT
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
...
...
@@ -87,12 +83,7 @@ KERNEL void computeBornSum(
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
bornSum
;
#endif
}
}
else
{
...
...
@@ -149,24 +140,14 @@ KERNEL void computeBornSum(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
bornSum
;
#endif
}
// Write results.
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
realToFixedPoint
(
localData
[
tgx
].
bornSum
));
#else
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
localData
[
tgx
].
bornSum
;
#endif
}
}
}
...
...
@@ -296,12 +277,7 @@ KERNEL void computeBornSum(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
bornSum
;
#endif
}
}
else
...
...
@@ -359,12 +335,7 @@ KERNEL void computeBornSum(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
bornSum
;
#endif
}
}
...
...
@@ -377,12 +348,7 @@ KERNEL void computeBornSum(
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom2
],
realToFixedPoint
(
localData
[
tgx
].
bornSum
));
#else
unsigned
int
offset
=
atom2
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
localData
[
tgx
].
bornSum
;
#endif
}
}
}
...
...
@@ -402,11 +368,7 @@ typedef struct {
*/
KERNEL
void
computeGBSAForce1
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_long
*
RESTRICT
forceBuffers
,
GLOBAL
mm_long
*
RESTRICT
global_bornForce
,
#else
GLOBAL
real4
*
RESTRICT
forceBuffers
,
GLOBAL
real
*
RESTRICT
global_bornForce
,
#endif
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
real
*
RESTRICT
charge
,
GLOBAL
const
real
*
RESTRICT
global_bornRadii
,
int
needEnergy
,
#ifdef USE_CUTOFF
...
...
@@ -490,16 +452,10 @@ KERNEL void computeGBSAForce1(
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
global_bornForce
[
offset
]
+=
force
.
w
;
#endif
}
}
else
{
...
...
@@ -561,36 +517,20 @@ KERNEL void computeGBSAForce1(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
global_bornForce
[
offset
]
+=
force
.
w
;
#endif
}
// Write results.
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
realToFixedPoint
(
localData
[
tgx
].
fx
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fz
));
ATOMIC_ADD
(
&
global_bornForce
[
offset
],
realToFixedPoint
(
localData
[
tgx
].
fw
));
#else
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
real4
f
=
forceBuffers
[
offset
];
f
.
x
+=
localData
[
tgx
].
fx
;
f
.
y
+=
localData
[
tgx
].
fy
;
f
.
z
+=
localData
[
tgx
].
fz
;
forceBuffers
[
offset
]
=
f
;
global_bornForce
[
offset
]
+=
localData
[
tgx
].
fw
;
#endif
}
}
}
...
...
@@ -722,16 +662,10 @@ KERNEL void computeGBSAForce1(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
global_bornForce
[
offset
]
+=
force
.
w
;
#endif
}
}
else
...
...
@@ -790,16 +724,10 @@ KERNEL void computeGBSAForce1(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
global_bornForce
[
offset
]
+=
force
.
w
;
#endif
}
}
...
...
@@ -812,20 +740,10 @@ KERNEL void computeGBSAForce1(
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
realToFixedPoint
(
localData
[
tgx
].
fx
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fz
));
ATOMIC_ADD
(
&
global_bornForce
[
atom2
],
realToFixedPoint
(
localData
[
tgx
].
fw
));
#else
unsigned
int
offset
=
atom2
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
real4
f
=
forceBuffers
[
offset
];
f
.
x
+=
localData
[
tgx
].
fx
;
f
.
y
+=
localData
[
tgx
].
fy
;
f
.
z
+=
localData
[
tgx
].
fz
;
forceBuffers
[
offset
]
=
f
;
global_bornForce
[
offset
]
+=
localData
[
tgx
].
fw
;
#endif
}
}
}
...
...
platforms/common/src/kernels/pme.cc
View file @
ae686364
KERNEL
void
findAtomGridIndex
(
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
int2
*
RESTRICT
pmeAtomGridIndex
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real4
recipBoxVecX
,
real4
recipBoxVecY
,
real4
recipBoxVecZ
#ifndef SUPPORTS_64_BIT_ATOMICS
,
GLOBAL
real4
*
RESTRICT
pmeBsplineTheta
,
LOCAL
real4
*
RESTRICT
bsplinesCache
,
#ifdef CHARGE_FROM_SIGEPS
GLOBAL
const
float2
*
RESTRICT
sigmaEpsilon
#else
GLOBAL
const
real
*
RESTRICT
charges
#endif
#endif
)
{
// Compute the index of the grid point each atom is associated with.
...
...
@@ -25,42 +17,9 @@ KERNEL void findAtomGridIndex(GLOBAL const real4* RESTRICT posq, GLOBAL int2* RE
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
z
)
%
GRID_SIZE_Z
);
pmeAtomGridIndex
[
atom
]
=
make_int2
(
atom
,
gridIndex
.
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
gridIndex
.
y
*
GRID_SIZE_Z
+
gridIndex
.
z
);
#ifndef SUPPORTS_64_BIT_ATOMICS
// Compute B-splines here for use in the charge spreading kernel.
const
real4
scale
=
1
/
(
real
)
(
PME_ORDER
-
1
);
LOCAL
real4
*
data
=
&
bsplinesCache
[
LOCAL_ID
*
PME_ORDER
];
real4
dr
=
(
real4
)
(
t
.
x
-
(
int
)
t
.
x
,
t
.
y
-
(
int
)
t
.
y
,
t
.
z
-
(
int
)
t
.
z
,
0.0
f
);
data
[
PME_ORDER
-
1
]
=
0.0
f
;
data
[
1
]
=
dr
;
data
[
0
]
=
1.0
f
-
dr
;
for
(
int
j
=
3
;
j
<
PME_ORDER
;
j
++
)
{
real
div
=
RECIP
(
j
-
1.0
f
);
data
[
j
-
1
]
=
div
*
dr
*
data
[
j
-
2
];
for
(
int
k
=
1
;
k
<
(
j
-
1
);
k
++
)
data
[
j
-
k
-
1
]
=
div
*
((
dr
+
make_real4
(
k
))
*
data
[
j
-
k
-
2
]
+
(
-
dr
+
make_real4
(
j
-
k
))
*
data
[
j
-
k
-
1
]);
data
[
0
]
=
div
*
(
-
dr
+
1.0
f
)
*
data
[
0
];
}
data
[
PME_ORDER
-
1
]
=
scale
*
dr
*
data
[
PME_ORDER
-
2
];
for
(
int
j
=
1
;
j
<
(
PME_ORDER
-
1
);
j
++
)
data
[
PME_ORDER
-
j
-
1
]
=
scale
*
((
dr
+
make_real4
(
j
))
*
data
[
PME_ORDER
-
j
-
2
]
+
(
-
dr
+
make_real4
(
PME_ORDER
-
j
))
*
data
[
PME_ORDER
-
j
-
1
]);
data
[
0
]
=
scale
*
(
-
dr
+
1.0
f
)
*
data
[
0
];
for
(
int
j
=
0
;
j
<
PME_ORDER
;
j
++
)
{
#ifdef CHARGE_FROM_SIGEPS
const
float2
sigEps
=
sigmaEpsilon
[
atom
];
const
real
charge
=
8
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
y
;
#else
const
real
charge
=
CHARGE
;
#endif
data
[
j
].
w
=
charge
;
// Storing the charge here improves cache coherency in the charge spreading kernel
pmeBsplineTheta
[
atom
+
j
*
NUM_ATOMS
]
=
data
[
j
];
}
#endif
}
}
#ifdef SUPPORTS_64_BIT_ATOMICS
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#if defined(USE_HIP) && !defined(AMD_RDNA)
LAUNCH_BOUNDS_EXACT
(
128
,
1
)
#endif
...
...
@@ -206,197 +165,6 @@ KERNEL void finishSpreadCharge(
#endif
}
}
#elif defined(DEVICE_IS_CPU)
KERNEL
void
gridSpreadCharge
(
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
real
*
RESTRICT
pmeGrid
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real4
recipBoxVecX
,
real4
recipBoxVecY
,
real4
recipBoxVecZ
,
#ifdef CHARGE_FROM_SIGEPS
GLOBAL
const
float2
*
RESTRICT
sigmaEpsilon
#else
GLOBAL
const
real
*
RESTRICT
charges
#endif
)
{
const
int
firstx
=
GLOBAL_ID
*
GRID_SIZE_X
/
GLOBAL_SIZE
;
const
int
lastx
=
(
GLOBAL_ID
+
1
)
*
GRID_SIZE_X
/
GLOBAL_SIZE
;
if
(
firstx
==
lastx
)
return
;
const
real4
scale
=
1
/
(
real
)
(
PME_ORDER
-
1
);
real4
data
[
PME_ORDER
];
// Process the atoms in spatially sorted order. This improves efficiency when writing
// the grid values.
for
(
int
i
=
0
;
i
<
NUM_ATOMS
;
i
++
)
{
int
atom
=
i
;
real4
pos
=
posq
[
atom
];
APPLY_PERIODIC_TO_POS
(
pos
)
real3
t
=
(
real3
)
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
z
*
recipBoxVecZ
.
z
);
t
.
x
=
(
t
.
x
-
floor
(
t
.
x
))
*
GRID_SIZE_X
;
t
.
y
=
(
t
.
y
-
floor
(
t
.
y
))
*
GRID_SIZE_Y
;
t
.
z
=
(
t
.
z
-
floor
(
t
.
z
))
*
GRID_SIZE_Z
;
int4
gridIndex
=
(
int4
)
(((
int
)
t
.
x
)
%
GRID_SIZE_X
,
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
z
)
%
GRID_SIZE_Z
,
0
);
// Spread the charge from this atom onto each grid point.
#ifdef CHARGE_FROM_SIGEPS
const
float2
sigEps
=
sigmaEpsilon
[
atom
];
const
real
charge
=
8
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
y
;
#else
const
real
charge
=
(
CHARGE
)
*
EPSILON_FACTOR
;
#endif
if
(
charge
==
0
)
continue
;
bool
hasComputedThetas
=
false
;
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
int
xindex
=
gridIndex
.
x
+
ix
;
xindex
-=
(
xindex
>=
GRID_SIZE_X
?
GRID_SIZE_X
:
0
);
if
(
xindex
<
firstx
||
xindex
>=
lastx
)
continue
;
if
(
!
hasComputedThetas
)
{
hasComputedThetas
=
true
;
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
real4
dr
=
(
real4
)
(
t
.
x
-
(
int
)
t
.
x
,
t
.
y
-
(
int
)
t
.
y
,
t
.
z
-
(
int
)
t
.
z
,
0.0
f
);
data
[
PME_ORDER
-
1
]
=
0.0
f
;
data
[
1
]
=
dr
;
data
[
0
]
=
1.0
f
-
dr
;
for
(
int
j
=
3
;
j
<
PME_ORDER
;
j
++
)
{
real
div
=
RECIP
(
j
-
1.0
f
);
data
[
j
-
1
]
=
div
*
dr
*
data
[
j
-
2
];
for
(
int
k
=
1
;
k
<
(
j
-
1
);
k
++
)
data
[
j
-
k
-
1
]
=
div
*
((
dr
+
(
real4
)
k
)
*
data
[
j
-
k
-
2
]
+
(
-
dr
+
(
real4
)
(
j
-
k
))
*
data
[
j
-
k
-
1
]);
data
[
0
]
=
div
*
(
-
dr
+
1.0
f
)
*
data
[
0
];
}
data
[
PME_ORDER
-
1
]
=
scale
*
dr
*
data
[
PME_ORDER
-
2
];
for
(
int
j
=
1
;
j
<
(
PME_ORDER
-
1
);
j
++
)
data
[
PME_ORDER
-
j
-
1
]
=
scale
*
((
dr
+
(
real4
)
j
)
*
data
[
PME_ORDER
-
j
-
2
]
+
(
-
dr
+
(
real4
)
(
PME_ORDER
-
j
))
*
data
[
PME_ORDER
-
j
-
1
]);
data
[
0
]
=
scale
*
(
-
dr
+
1.0
f
)
*
data
[
0
];
}
for
(
int
iy
=
0
;
iy
<
PME_ORDER
;
iy
++
)
{
int
yindex
=
gridIndex
.
y
+
iy
;
yindex
-=
(
yindex
>=
GRID_SIZE_Y
?
GRID_SIZE_Y
:
0
);
for
(
int
iz
=
0
;
iz
<
PME_ORDER
;
iz
++
)
{
int
zindex
=
gridIndex
.
z
+
iz
;
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
);
int
index
=
xindex
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
yindex
*
GRID_SIZE_Z
+
zindex
;
pmeGrid
[
index
]
+=
charge
*
data
[
ix
].
x
*
data
[
iy
].
y
*
data
[
iz
].
z
;
}
}
}
}
}
#else
/**
* For each grid point, find the range of sorted atoms associated with that point.
*/
KERNEL
void
findAtomRangeForGrid
(
GLOBAL
int2
*
RESTRICT
pmeAtomGridIndex
,
GLOBAL
int
*
RESTRICT
pmeAtomRange
,
GLOBAL
const
real4
*
RESTRICT
posq
)
{
int
start
=
(
NUM_ATOMS
*
GLOBAL_ID
)
/
GLOBAL_SIZE
;
int
end
=
(
NUM_ATOMS
*
(
GLOBAL_ID
+
1
))
/
GLOBAL_SIZE
;
int
last
=
(
start
==
0
?
-
1
:
pmeAtomGridIndex
[
start
-
1
].
y
);
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
int2
atomData
=
pmeAtomGridIndex
[
i
];
int
gridIndex
=
atomData
.
y
;
if
(
gridIndex
!=
last
)
{
for
(
int
j
=
last
+
1
;
j
<=
gridIndex
;
++
j
)
pmeAtomRange
[
j
]
=
i
;
last
=
gridIndex
;
}
}
// Fill in values beyond the last atom.
if
(
GLOBAL_ID
==
GLOBAL_SIZE
-
1
)
{
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
for
(
int
j
=
last
+
1
;
j
<=
gridSize
;
++
j
)
pmeAtomRange
[
j
]
=
NUM_ATOMS
;
}
}
/**
* The grid index won't be needed again. Reuse that component to hold the z index, thus saving
* some work in the charge spreading kernel.
*/
KERNEL
void
recordZIndex
(
GLOBAL
int2
*
RESTRICT
pmeAtomGridIndex
,
GLOBAL
const
real4
*
RESTRICT
posq
,
real4
periodicBoxSize
,
real4
recipBoxVecZ
)
{
int
start
=
(
NUM_ATOMS
*
GLOBAL_ID
)
/
GLOBAL_SIZE
;
int
end
=
(
NUM_ATOMS
*
(
GLOBAL_ID
+
1
))
/
GLOBAL_SIZE
;
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
real
posz
=
posq
[
pmeAtomGridIndex
[
i
].
x
].
z
;
posz
-=
floor
(
posz
*
recipBoxVecZ
.
z
)
*
periodicBoxSize
.
z
;
int
z
=
((
int
)
((
posz
*
recipBoxVecZ
.
z
)
*
GRID_SIZE_Z
))
%
GRID_SIZE_Z
;
pmeAtomGridIndex
[
i
].
y
=
z
;
}
}
KERNEL
void
gridSpreadCharge
(
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
real
*
RESTRICT
pmeGrid
,
GLOBAL
const
int2
*
RESTRICT
pmeAtomGridIndex
,
GLOBAL
const
int
*
RESTRICT
pmeAtomRange
,
GLOBAL
const
real4
*
RESTRICT
pmeBsplineTheta
#ifdef CHARGE_FROM_SIGEPS
,
GLOBAL
const
float2
*
RESTRICT
sigmaEpsilon
#else
,
GLOBAL
const
real
*
RESTRICT
charges
#endif
)
{
unsigned
int
numGridPoints
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
for
(
int
gridIndex
=
GLOBAL_ID
;
gridIndex
<
numGridPoints
;
gridIndex
+=
GLOBAL_SIZE
)
{
// Compute the charge on a grid point.
int4
gridPoint
;
gridPoint
.
x
=
gridIndex
/
(
GRID_SIZE_Y
*
GRID_SIZE_Z
);
int
remainder
=
gridIndex
-
gridPoint
.
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
gridPoint
.
y
=
remainder
/
GRID_SIZE_Z
;
gridPoint
.
z
=
remainder
-
gridPoint
.
y
*
GRID_SIZE_Z
;
real
result
=
0.0
f
;
// Loop over all atoms that affect this grid point.
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
++
ix
)
{
int
x
=
gridPoint
.
x
-
ix
+
(
gridPoint
.
x
>=
ix
?
0
:
GRID_SIZE_X
);
for
(
int
iy
=
0
;
iy
<
PME_ORDER
;
++
iy
)
{
int
y
=
gridPoint
.
y
-
iy
+
(
gridPoint
.
y
>=
iy
?
0
:
GRID_SIZE_Y
);
int
z1
=
gridPoint
.
z
-
PME_ORDER
+
1
;
z1
+=
(
z1
>=
0
?
0
:
GRID_SIZE_Z
);
int
z2
=
(
z1
<
gridPoint
.
z
?
gridPoint
.
z
:
GRID_SIZE_Z
-
1
);
int
gridIndex1
=
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
y
*
GRID_SIZE_Z
+
z1
;
int
gridIndex2
=
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
y
*
GRID_SIZE_Z
+
z2
;
int
firstAtom
=
pmeAtomRange
[
gridIndex1
];
int
lastAtom
=
pmeAtomRange
[
gridIndex2
+
1
];
for
(
int
i
=
firstAtom
;
i
<
lastAtom
;
++
i
)
{
int2
atomData
=
pmeAtomGridIndex
[
i
];
int
atomIndex
=
atomData
.
x
;
int
z
=
atomData
.
y
;
int
iz
=
gridPoint
.
z
-
z
+
(
gridPoint
.
z
>=
z
?
0
:
GRID_SIZE_Z
);
real
atomCharge
=
pmeBsplineTheta
[
atomIndex
+
ix
*
NUM_ATOMS
].
w
;
result
+=
atomCharge
*
pmeBsplineTheta
[
atomIndex
+
ix
*
NUM_ATOMS
].
x
*
pmeBsplineTheta
[
atomIndex
+
iy
*
NUM_ATOMS
].
y
*
pmeBsplineTheta
[
atomIndex
+
iz
*
NUM_ATOMS
].
z
;
}
if
(
z1
>
gridPoint
.
z
)
{
gridIndex1
=
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
y
*
GRID_SIZE_Z
;
gridIndex2
=
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
y
*
GRID_SIZE_Z
+
gridPoint
.
z
;
firstAtom
=
pmeAtomRange
[
gridIndex1
];
lastAtom
=
pmeAtomRange
[
gridIndex2
+
1
];
for
(
int
i
=
firstAtom
;
i
<
lastAtom
;
++
i
)
{
int2
atomData
=
pmeAtomGridIndex
[
i
];
int
atomIndex
=
atomData
.
x
;
int
z
=
atomData
.
y
;
int
iz
=
gridPoint
.
z
-
z
+
(
gridPoint
.
z
>=
z
?
0
:
GRID_SIZE_Z
);
real
atomCharge
=
pmeBsplineTheta
[
atomIndex
+
ix
*
NUM_ATOMS
].
w
;
result
+=
atomCharge
*
pmeBsplineTheta
[
atomIndex
+
ix
*
NUM_ATOMS
].
x
*
pmeBsplineTheta
[
atomIndex
+
iy
*
NUM_ATOMS
].
y
*
pmeBsplineTheta
[
atomIndex
+
iz
*
NUM_ATOMS
].
z
;
}
}
}
}
pmeGrid
[
gridIndex
]
=
result
*
EPSILON_FACTOR
;
}
}
#endif
KERNEL
void
reciprocalConvolution
(
GLOBAL
real2
*
RESTRICT
pmeGrid
,
GLOBAL
const
real
*
RESTRICT
pmeBsplineModuliX
,
GLOBAL
const
real
*
RESTRICT
pmeBsplineModuliY
,
GLOBAL
const
real
*
RESTRICT
pmeBsplineModuliZ
,
...
...
platforms/opencl/include/OpenCLBondedUtilities.h
View file @
ae686364
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-20
19
Stanford University and the Authors. *
* Portions copyright (c) 2011-20
22
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -133,12 +133,6 @@ public:
* Initialize this object in preparation for a simulation.
*/
void
initialize
(
const
System
&
system
);
/**
* Get the number of force buffers required for bonded forces.
*/
int
getNumForceBuffers
()
{
return
numForceBuffers
;
}
/**
* Compute the bonded interactions.
*
...
...
@@ -148,19 +142,17 @@ public:
private:
std
::
string
createForceSource
(
int
forceIndex
,
int
numBonds
,
int
numAtoms
,
int
group
,
const
std
::
string
&
computeForce
);
OpenCLContext
&
context
;
std
::
vector
<
cl
::
Kernel
>
kernel
s
;
cl
::
Kernel
kernel
;
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>
>
>
forceAtoms
;
std
::
vector
<
int
>
indexWidth
;
std
::
vector
<
std
::
string
>
forceSource
;
std
::
vector
<
int
>
forceGroup
;
std
::
vector
<
std
::
vector
<
int
>
>
forceSets
;
std
::
vector
<
cl
::
Memory
*>
arguments
;
std
::
vector
<
std
::
string
>
argTypes
;
std
::
vector
<
OpenCLArray
>
atomIndices
;
std
::
vector
<
OpenCLArray
>
bufferIndices
;
std
::
vector
<
std
::
string
>
prefixCode
;
std
::
vector
<
std
::
string
>
energyParameterDerivatives
;
int
numForceBuffers
,
maxBonds
,
allGroups
;
int
maxBonds
,
allGroups
;
bool
hasInitializedKernels
;
};
...
...
platforms/opencl/include/OpenCLNonbondedUtilities.h
View file @
ae686364
...
...
@@ -125,7 +125,7 @@ public:
* Get the number of force buffers required for nonbonded forces.
*/
int
getNumForceBuffers
()
const
{
return
numForceBuffers
;
return
1
;
}
/**
* Get the number of energy buffers required for nonbonded forces.
...
...
@@ -331,7 +331,7 @@ private:
std
::
map
<
int
,
std
::
string
>
groupKernelSource
;
double
lastCutoff
;
bool
useCutoff
,
usePeriodic
,
deviceIsCpu
,
anyExclusions
,
usePadding
,
forceRebuildNeighborList
;
int
numForceBuffers
,
startTileIndex
,
startBlockIndex
,
numBlocks
,
maxExclusions
,
numForceThreadBlocks
;
int
startTileIndex
,
startBlockIndex
,
numBlocks
,
maxExclusions
,
numForceThreadBlocks
;
int
forceThreadBlockSize
,
interactingBlocksThreadBlockSize
,
groupFlags
;
unsigned
int
tilesAfterReorder
;
long
long
numTiles
;
...
...
platforms/opencl/src/OpenCLBondedUtilities.cpp
View file @
ae686364
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-20
19
Stanford University and the Authors. *
* Portions copyright (c) 2011-20
22
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -34,7 +34,7 @@
using
namespace
OpenMM
;
using
namespace
std
;
OpenCLBondedUtilities
::
OpenCLBondedUtilities
(
OpenCLContext
&
context
)
:
context
(
context
),
numForceBuffers
(
0
),
maxBonds
(
0
),
allGroups
(
0
),
hasInitializedKernels
(
false
)
{
OpenCLBondedUtilities
::
OpenCLBondedUtilities
(
OpenCLContext
&
context
)
:
context
(
context
),
maxBonds
(
0
),
allGroups
(
0
),
hasInitializedKernels
(
false
)
{
}
void
OpenCLBondedUtilities
::
addInteraction
(
const
vector
<
vector
<
int
>
>&
atoms
,
const
string
&
source
,
int
group
)
{
...
...
@@ -85,11 +85,8 @@ void OpenCLBondedUtilities::initialize(const System& system) {
if
(
numForces
==
0
)
return
;
// Build the lists of atom
indices and buffer
indices.
// Build the lists of atom indices.
vector
<
vector
<
cl_uint
>
>
bufferVec
(
numForces
);
vector
<
vector
<
int
>
>
bufferCounter
(
numForces
,
vector
<
int
>
(
system
.
getNumParticles
(),
0
));
vector
<
int
>
numBuffers
(
numForces
,
0
);
atomIndices
.
resize
(
numForces
);
for
(
int
i
=
0
;
i
<
numForces
;
i
++
)
{
int
numBonds
=
forceAtoms
[
i
].
size
();
...
...
@@ -102,126 +99,40 @@ void OpenCLBondedUtilities::initialize(const System& system) {
}
atomIndices
[
i
].
initialize
<
cl_uint
>
(
context
,
indexVec
.
size
(),
"bondedIndices"
);
atomIndices
[
i
].
upload
(
indexVec
);
bufferVec
[
i
].
resize
(
width
*
numBonds
,
0
);
for
(
int
bond
=
0
;
bond
<
numBonds
;
bond
++
)
{
for
(
int
atom
=
0
;
atom
<
numAtoms
;
atom
++
)
bufferVec
[
i
][
bond
*
width
+
atom
]
=
bufferCounter
[
i
][
forceAtoms
[
i
][
bond
][
atom
]]
++
;
}
for
(
int
j
=
0
;
j
<
(
int
)
bufferCounter
[
i
].
size
();
j
++
)
numBuffers
[
i
]
=
max
(
numBuffers
[
i
],
bufferCounter
[
i
][
j
]);
}
// For efficiency, we want to merge multiple forces into a single kernel - but only if that
// won't increase the number of force buffers.
if
(
context
.
getSupports64BitGlobalAtomics
())
{
// Put all the forces in the same set.
numForceBuffers
=
1
;
forceSets
.
push_back
(
vector
<
int
>
());
for
(
int
i
=
0
;
i
<
numForces
;
i
++
)
forceSets
[
0
].
push_back
(
i
);
}
else
{
// Figure out how many force buffers will be required.
for
(
int
i
=
0
;
i
<
numForces
;
i
++
)
numForceBuffers
=
max
(
numForceBuffers
,
numBuffers
[
i
]);
int
bufferLimit
=
max
(
numForceBuffers
,
(
int
)
context
.
getPlatformData
().
contexts
.
size
());
if
(
context
.
getNonbondedUtilities
().
getHasInteractions
())
bufferLimit
=
max
(
bufferLimit
,
context
.
getNonbondedUtilities
().
getNumForceBuffers
());
// Figure out sets of forces that can be merged.
vector
<
int
>
unmerged
(
numForces
);
for
(
int
i
=
0
;
i
<
numForces
;
i
++
)
unmerged
[
i
]
=
i
;
for
(
int
i
=
0
;
i
<
numForces
;
i
++
)
for
(
int
j
=
i
-
1
;
j
>=
0
;
j
--
)
{
if
(
numBuffers
[
unmerged
[
j
]]
<=
numBuffers
[
unmerged
[
j
+
1
]])
break
;
int
temp
=
unmerged
[
j
+
1
];
unmerged
[
j
+
1
]
=
unmerged
[
j
];
unmerged
[
j
]
=
temp
;
}
while
(
unmerged
.
size
()
>
0
)
{
int
sum
=
numBuffers
[
unmerged
.
back
()];
int
i
;
for
(
i
=
0
;
i
<
(
int
)
unmerged
.
size
()
-
1
;
i
++
)
{
if
(
sum
+
numBuffers
[
unmerged
[
i
]]
>
bufferLimit
)
break
;
sum
+=
numBuffers
[
unmerged
[
i
]];
}
forceSets
.
push_back
(
vector
<
int
>
());
for
(
int
j
=
0
;
j
<
i
;
j
++
)
forceSets
.
back
().
push_back
(
unmerged
[
j
]);
forceSets
.
back
().
push_back
(
unmerged
.
back
());
for
(
int
j
=
0
;
j
<
i
;
j
++
)
unmerged
.
erase
(
unmerged
.
begin
());
unmerged
.
pop_back
();
}
}
// Update the buffer indices based on merged sets.
bufferIndices
.
resize
(
numForces
);
for
(
int
i
=
0
;
i
<
(
int
)
forceSets
.
size
();
i
++
)
for
(
int
j
=
0
;
j
<
(
int
)
forceSets
[
i
].
size
();
j
++
)
{
int
force
=
forceSets
[
i
][
j
];
int
numBonds
=
forceAtoms
[
force
].
size
();
int
numAtoms
=
forceAtoms
[
force
][
0
].
size
();
int
width
=
indexWidth
[
force
];
for
(
int
k
=
0
;
k
<
j
;
k
++
)
for
(
int
bond
=
0
;
bond
<
numBonds
;
bond
++
)
for
(
int
atom
=
0
;
atom
<
numAtoms
;
atom
++
)
bufferVec
[
force
][
bond
*
width
+
atom
]
+=
bufferCounter
[
forceSets
[
i
][
k
]][
forceAtoms
[
force
][
bond
][
atom
]];
bufferIndices
[
force
].
initialize
<
cl_uint
>
(
context
,
bufferVec
[
force
].
size
(),
"bondedBufferIndices"
);
bufferIndices
[
force
].
upload
(
bufferVec
[
force
]);
}
// Create the kernels.
// Create the kernel.
for
(
auto
&
set
:
forceSets
)
{
int
setSize
=
set
.
size
();
stringstream
s
;
s
<<
"#ifdef SUPPORTS_64_BIT_ATOMICS
\n
"
;
s
<<
"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
\n
"
;
s
<<
"#endif
\n
"
;
for
(
int
i
=
0
;
i
<
(
int
)
prefixCode
.
size
();
i
++
)
s
<<
prefixCode
[
i
];
string
bufferType
=
(
context
.
getSupports64BitGlobalAtomics
()
?
"long"
:
"real4"
);
s
<<
"__kernel void computeBondedForces(__global "
<<
bufferType
<<
"* restrict forceBuffers, __global mixed* restrict energyBuffer, __global const real4* restrict posq, int groups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ"
;
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
int
force
=
set
[
i
];
string
indexType
=
"uint"
+
(
indexWidth
[
force
]
==
1
?
""
:
context
.
intToString
(
indexWidth
[
force
]));
s
<<
", __global const "
<<
indexType
<<
"* restrict atomIndices"
<<
i
;
s
<<
", __global const "
<<
indexType
<<
"* restrict bufferIndices"
<<
i
;
}
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
s
<<
", __global "
<<
argTypes
[
i
]
<<
"* customArg"
<<
(
i
+
1
);
if
(
energyParameterDerivatives
.
size
()
>
0
)
s
<<
", __global mixed* restrict energyParamDerivs"
;
s
<<
") {
\n
"
;
s
<<
"mixed energy = 0;
\n
"
;
for
(
int
i
=
0
;
i
<
energyParameterDerivatives
.
size
();
i
++
)
s
<<
"mixed energyParamDeriv"
<<
i
<<
" = 0;
\n
"
;
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
int
force
=
set
[
i
];
s
<<
createForceSource
(
i
,
forceAtoms
[
force
].
size
(),
forceAtoms
[
force
][
0
].
size
(),
forceGroup
[
force
],
forceSource
[
force
]);
}
s
<<
"energyBuffer[get_global_id(0)] += energy;
\n
"
;
const
vector
<
string
>&
allParamDerivNames
=
context
.
getEnergyParamDerivNames
();
int
numDerivs
=
allParamDerivNames
.
size
();
for
(
int
i
=
0
;
i
<
energyParameterDerivatives
.
size
();
i
++
)
for
(
int
index
=
0
;
index
<
numDerivs
;
index
++
)
if
(
allParamDerivNames
[
index
]
==
energyParameterDerivatives
[
i
])
s
<<
"energyParamDerivs[get_global_id(0)*"
<<
numDerivs
<<
"+"
<<
index
<<
"] += energyParamDeriv"
<<
i
<<
";
\n
"
;
s
<<
"}
\n
"
;
map
<
string
,
string
>
defines
;
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
cl
::
Program
program
=
context
.
createProgram
(
s
.
str
(),
defines
);
kernels
.
push_back
(
cl
::
Kernel
(
program
,
"computeBondedForces"
));
stringstream
s
;
for
(
int
i
=
0
;
i
<
(
int
)
prefixCode
.
size
();
i
++
)
s
<<
prefixCode
[
i
];
s
<<
"__kernel void computeBondedForces(__global long* restrict forceBuffers, __global mixed* restrict energyBuffer, __global const real4* restrict posq, int groups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ"
;
for
(
int
force
=
0
;
force
<
numForces
;
force
++
)
{
string
indexType
=
"uint"
+
(
indexWidth
[
force
]
==
1
?
""
:
context
.
intToString
(
indexWidth
[
force
]));
s
<<
", __global const "
<<
indexType
<<
"* restrict atomIndices"
<<
force
;
}
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
s
<<
", __global "
<<
argTypes
[
i
]
<<
"* customArg"
<<
(
i
+
1
);
if
(
energyParameterDerivatives
.
size
()
>
0
)
s
<<
", __global mixed* restrict energyParamDerivs"
;
s
<<
") {
\n
"
;
s
<<
"mixed energy = 0;
\n
"
;
for
(
int
i
=
0
;
i
<
energyParameterDerivatives
.
size
();
i
++
)
s
<<
"mixed energyParamDeriv"
<<
i
<<
" = 0;
\n
"
;
for
(
int
force
=
0
;
force
<
numForces
;
force
++
)
s
<<
createForceSource
(
force
,
forceAtoms
[
force
].
size
(),
forceAtoms
[
force
][
0
].
size
(),
forceGroup
[
force
],
forceSource
[
force
]);
s
<<
"energyBuffer[get_global_id(0)] += energy;
\n
"
;
const
vector
<
string
>&
allParamDerivNames
=
context
.
getEnergyParamDerivNames
();
int
numDerivs
=
allParamDerivNames
.
size
();
for
(
int
i
=
0
;
i
<
energyParameterDerivatives
.
size
();
i
++
)
for
(
int
index
=
0
;
index
<
numDerivs
;
index
++
)
if
(
allParamDerivNames
[
index
]
==
energyParameterDerivatives
[
i
])
s
<<
"energyParamDerivs[get_global_id(0)*"
<<
numDerivs
<<
"+"
<<
index
<<
"] += energyParamDeriv"
<<
i
<<
";
\n
"
;
s
<<
"}
\n
"
;
map
<
string
,
string
>
defines
;
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
cl
::
Program
program
=
context
.
createProgram
(
s
.
str
(),
defines
);
kernel
=
cl
::
Kernel
(
program
,
"computeBondedForces"
);
forceAtoms
.
clear
();
forceSource
.
clear
();
}
...
...
@@ -247,7 +158,6 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
s
<<
"if ((groups&"
<<
(
1
<<
group
)
<<
") != 0)
\n
"
;
s
<<
"for (unsigned int index = get_global_id(0); index < "
<<
numBonds
<<
"; index += get_global_size(0)) {
\n
"
;
s
<<
" "
<<
indexType
<<
" atoms = atomIndices"
<<
forceIndex
<<
"[index];
\n
"
;
s
<<
" "
<<
indexType
<<
" buffers = bufferIndices"
<<
forceIndex
<<
"[index];
\n
"
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
s
<<
" unsigned int atom"
<<
(
i
+
1
)
<<
" = atoms"
<<
suffix
[
i
]
<<
";
\n
"
;
s
<<
" real4 pos"
<<
(
i
+
1
)
<<
" = posq[atom"
<<
(
i
+
1
)
<<
"];
\n
"
;
...
...
@@ -255,17 +165,9 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
s
<<
computeForce
<<
"
\n
"
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
s
<<
" {
\n
"
;
if
(
context
.
getSupports64BitGlobalAtomics
())
{
s
<<
" atom_add(&forceBuffers[atom"
<<
(
i
+
1
)
<<
"], realToFixedPoint(force"
<<
(
i
+
1
)
<<
".x));
\n
"
;
s
<<
" atom_add(&forceBuffers[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS], realToFixedPoint(force"
<<
(
i
+
1
)
<<
".y));
\n
"
;
s
<<
" atom_add(&forceBuffers[atom"
<<
(
i
+
1
)
<<
"+2*PADDED_NUM_ATOMS], realToFixedPoint(force"
<<
(
i
+
1
)
<<
".z));
\n
"
;
}
else
{
s
<<
" unsigned int offset = atom"
<<
(
i
+
1
)
<<
"+buffers"
<<
suffix
[
i
]
<<
"*PADDED_NUM_ATOMS;
\n
"
;
s
<<
" real4 force = forceBuffers[offset];
\n
"
;
s
<<
" force.xyz += force"
<<
(
i
+
1
)
<<
".xyz;
\n
"
;
s
<<
" forceBuffers[offset] = force;
\n
"
;
}
s
<<
" ATOMIC_ADD(&forceBuffers[atom"
<<
(
i
+
1
)
<<
"], (mm_ulong) realToFixedPoint(force"
<<
(
i
+
1
)
<<
".x));
\n
"
;
s
<<
" ATOMIC_ADD(&forceBuffers[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force"
<<
(
i
+
1
)
<<
".y));
\n
"
;
s
<<
" ATOMIC_ADD(&forceBuffers[atom"
<<
(
i
+
1
)
<<
"+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force"
<<
(
i
+
1
)
<<
".z));
\n
"
;
s
<<
" }
\n
"
;
}
s
<<
"}
\n
"
;
...
...
@@ -277,43 +179,32 @@ void OpenCLBondedUtilities::computeInteractions(int groups) {
return
;
if
(
!
hasInitializedKernels
)
{
hasInitializedKernels
=
true
;
for
(
int
i
=
0
;
i
<
(
int
)
forceSets
.
size
();
i
++
)
{
int
index
=
0
;
cl
::
Kernel
&
kernel
=
kernels
[
i
];
if
(
context
.
getSupports64BitGlobalAtomics
())
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getLongForceBuffer
().
getDeviceBuffer
());
else
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getForceBuffers
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getEnergyBuffer
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getPosq
().
getDeviceBuffer
());
index
+=
6
;
for
(
int
j
=
0
;
j
<
(
int
)
forceSets
[
i
].
size
();
j
++
)
{
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
atomIndices
[
forceSets
[
i
][
j
]].
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
bufferIndices
[
forceSets
[
i
][
j
]].
getDeviceBuffer
());
}
for
(
int
j
=
0
;
j
<
(
int
)
arguments
.
size
();
j
++
)
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
*
arguments
[
j
]);
if
(
energyParameterDerivatives
.
size
()
>
0
)
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
context
.
getEnergyParamDerivBuffer
().
getDeviceBuffer
());
}
int
index
=
0
;
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getLongForceBuffer
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getEnergyBuffer
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getPosq
().
getDeviceBuffer
());
index
+=
6
;
for
(
int
j
=
0
;
j
<
(
int
)
atomIndices
.
size
();
j
++
)
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
atomIndices
[
j
].
getDeviceBuffer
());
for
(
int
j
=
0
;
j
<
(
int
)
arguments
.
size
();
j
++
)
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
*
arguments
[
j
]);
if
(
energyParameterDerivatives
.
size
()
>
0
)
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
context
.
getEnergyParamDerivBuffer
().
getDeviceBuffer
());
}
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
{
cl
::
Kernel
&
kernel
=
kernels
[
i
];
kernel
.
setArg
<
cl_int
>
(
3
,
groups
);
if
(
context
.
getUseDoublePrecision
())
{
kernel
.
setArg
<
mm_double4
>
(
4
,
context
.
getPeriodicBoxSizeDouble
());
kernel
.
setArg
<
mm_double4
>
(
5
,
context
.
getInvPeriodicBoxSizeDouble
());
kernel
.
setArg
<
mm_double4
>
(
6
,
context
.
getPeriodicBoxVecXDouble
());
kernel
.
setArg
<
mm_double4
>
(
7
,
context
.
getPeriodicBoxVecYDouble
());
kernel
.
setArg
<
mm_double4
>
(
8
,
context
.
getPeriodicBoxVecZDouble
());
}
else
{
kernel
.
setArg
<
mm_float4
>
(
4
,
context
.
getPeriodicBoxSize
());
kernel
.
setArg
<
mm_float4
>
(
5
,
context
.
getInvPeriodicBoxSize
());
kernel
.
setArg
<
mm_float4
>
(
6
,
context
.
getPeriodicBoxVecX
());
kernel
.
setArg
<
mm_float4
>
(
7
,
context
.
getPeriodicBoxVecY
());
kernel
.
setArg
<
mm_float4
>
(
8
,
context
.
getPeriodicBoxVecZ
());
}
context
.
executeKernel
(
kernels
[
i
],
maxBonds
);
kernel
.
setArg
<
cl_int
>
(
3
,
groups
);
if
(
context
.
getUseDoublePrecision
())
{
kernel
.
setArg
<
mm_double4
>
(
4
,
context
.
getPeriodicBoxSizeDouble
());
kernel
.
setArg
<
mm_double4
>
(
5
,
context
.
getInvPeriodicBoxSizeDouble
());
kernel
.
setArg
<
mm_double4
>
(
6
,
context
.
getPeriodicBoxVecXDouble
());
kernel
.
setArg
<
mm_double4
>
(
7
,
context
.
getPeriodicBoxVecYDouble
());
kernel
.
setArg
<
mm_double4
>
(
8
,
context
.
getPeriodicBoxVecZDouble
());
}
else
{
kernel
.
setArg
<
mm_float4
>
(
4
,
context
.
getPeriodicBoxSize
());
kernel
.
setArg
<
mm_float4
>
(
5
,
context
.
getInvPeriodicBoxSize
());
kernel
.
setArg
<
mm_float4
>
(
6
,
context
.
getPeriodicBoxVecX
());
kernel
.
setArg
<
mm_float4
>
(
7
,
context
.
getPeriodicBoxVecY
());
kernel
.
setArg
<
mm_float4
>
(
8
,
context
.
getPeriodicBoxVecZ
());
}
context
.
executeKernel
(
kernel
,
maxBonds
);
}
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment