Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
bd22eada
Commit
bd22eada
authored
Jun 20, 2012
by
Peter Eastman
Browse files
Continuing to implement new CUDA platform: CustomNonbondedForce, CustomHbondForce, CustomIntegrator
parent
8eb6850d
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
3519 additions
and
1697 deletions
+3519
-1697
platforms/cuda2/src/CudaArray.cpp
platforms/cuda2/src/CudaArray.cpp
+1
-1
platforms/cuda2/src/CudaArray.h
platforms/cuda2/src/CudaArray.h
+2
-2
platforms/cuda2/src/CudaContext.cpp
platforms/cuda2/src/CudaContext.cpp
+4
-0
platforms/cuda2/src/CudaKernelFactory.cpp
platforms/cuda2/src/CudaKernelFactory.cpp
+6
-6
platforms/cuda2/src/CudaKernels.cpp
platforms/cuda2/src/CudaKernels.cpp
+1532
-1460
platforms/cuda2/src/CudaKernels.h
platforms/cuda2/src/CudaKernels.h
+187
-186
platforms/cuda2/src/CudaParameterSet.cpp
platforms/cuda2/src/CudaParameterSet.cpp
+56
-37
platforms/cuda2/src/CudaParameterSet.h
platforms/cuda2/src/CudaParameterSet.h
+7
-5
platforms/cuda2/src/kernels/customHbondForce.cu
platforms/cuda2/src/kernels/customHbondForce.cu
+241
-0
platforms/cuda2/src/kernels/customIntegrator.cu
platforms/cuda2/src/kernels/customIntegrator.cu
+70
-0
platforms/cuda2/src/kernels/customIntegratorGlobal.cu
platforms/cuda2/src/kernels/customIntegratorGlobal.cu
+4
-0
platforms/cuda2/src/kernels/customIntegratorPerDof.cu
platforms/cuda2/src/kernels/customIntegratorPerDof.cu
+35
-0
platforms/cuda2/src/kernels/customNonbonded.cu
platforms/cuda2/src/kernels/customNonbonded.cu
+9
-0
platforms/cuda2/tests/TestCudaCustomHbondForce.cpp
platforms/cuda2/tests/TestCudaCustomHbondForce.cpp
+249
-0
platforms/cuda2/tests/TestCudaCustomIntegrator.cpp
platforms/cuda2/tests/TestCudaCustomIntegrator.cpp
+681
-0
platforms/cuda2/tests/TestCudaCustomNonbondedForce.cpp
platforms/cuda2/tests/TestCudaCustomNonbondedForce.cpp
+435
-0
No files found.
platforms/cuda2/src/CudaArray.cpp
View file @
bd22eada
...
...
@@ -53,7 +53,7 @@ CudaArray::~CudaArray() {
}
}
void
CudaArray
::
upload
(
void
*
data
,
bool
blocking
)
{
void
CudaArray
::
upload
(
const
void
*
data
,
bool
blocking
)
{
CUresult
result
;
if
(
blocking
)
result
=
cuMemcpyHtoD
(
pointer
,
data
,
size
*
elementSize
);
...
...
platforms/cuda2/src/CudaArray.h
View file @
bd22eada
...
...
@@ -94,7 +94,7 @@ public:
* Copy the values in a vector to the device memory.
*/
template
<
class
T
>
void
upload
(
std
::
vector
<
T
>&
data
)
{
void
upload
(
const
std
::
vector
<
T
>&
data
)
{
if
(
sizeof
(
T
)
!=
elementSize
||
data
.
size
()
!=
size
)
throw
OpenMMException
(
"Error uploading array "
+
name
+
": The specified vector does not match the size of the array"
);
upload
(
&
data
[
0
],
true
);
...
...
@@ -117,7 +117,7 @@ public:
* @param blocking if true, this call will block until the transfer is complete. If false,
* the source array must be in page-locked memory.
*/
void
upload
(
void
*
data
,
bool
blocking
=
true
);
void
upload
(
const
void
*
data
,
bool
blocking
=
true
);
/**
* Copy the values in the device memory to an array.
*
...
...
platforms/cuda2/src/CudaContext.cpp
View file @
bd22eada
...
...
@@ -945,6 +945,10 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
reorderListeners
[
i
]
->
execute
();
}
void
CudaContext
::
addReorderListener
(
ReorderListener
*
listener
)
{
reorderListeners
.
push_back
(
listener
);
}
struct
CudaContext
::
WorkThread
::
ThreadData
{
ThreadData
(
std
::
queue
<
CudaContext
::
WorkTask
*>&
tasks
,
bool
&
waiting
,
bool
&
finished
,
pthread_mutex_t
&
queueLock
,
pthread_cond_t
&
waitForTaskCondition
,
pthread_cond_t
&
queueEmptyCondition
)
:
...
...
platforms/cuda2/src/CudaKernelFactory.cpp
View file @
bd22eada
...
...
@@ -94,16 +94,16 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return
new
CudaCalcCustomTorsionForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcNonbondedForceKernel
::
Name
())
return
new
CudaCalcNonbondedForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
//
if (name == CalcCustomNonbondedForceKernel::Name())
//
return new CudaCalcCustomNonbondedForceKernel(name, platform, cu, context.getSystem());
if
(
name
==
CalcCustomNonbondedForceKernel
::
Name
())
return
new
CudaCalcCustomNonbondedForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
// if (name == CalcGBSAOBCForceKernel::Name())
// return new CudaCalcGBSAOBCForceKernel(name, platform, cu);
// if (name == CalcCustomGBForceKernel::Name())
// return new CudaCalcCustomGBForceKernel(name, platform, cu, context.getSystem());
if
(
name
==
CalcCustomExternalForceKernel
::
Name
())
return
new
CudaCalcCustomExternalForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
//
if (name == CalcCustomHbondForceKernel::Name())
//
return new CudaCalcCustomHbondForceKernel(name, platform, cu, context.getSystem());
if
(
name
==
CalcCustomHbondForceKernel
::
Name
())
return
new
CudaCalcCustomHbondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcCustomCompoundBondForceKernel
::
Name
())
return
new
CudaCalcCustomCompoundBondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
IntegrateVerletStepKernel
::
Name
())
...
...
@@ -116,8 +116,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return
new
CudaIntegrateVariableVerletStepKernel
(
name
,
platform
,
cu
);
if
(
name
==
IntegrateVariableLangevinStepKernel
::
Name
())
return
new
CudaIntegrateVariableLangevinStepKernel
(
name
,
platform
,
cu
);
//
if (name == IntegrateCustomStepKernel::Name())
//
return new CudaIntegrateCustomStepKernel(name, platform, cu);
if
(
name
==
IntegrateCustomStepKernel
::
Name
())
return
new
CudaIntegrateCustomStepKernel
(
name
,
platform
,
cu
);
if
(
name
==
ApplyAndersenThermostatKernel
::
Name
())
return
new
CudaApplyAndersenThermostatKernel
(
name
,
platform
,
cu
);
if
(
name
==
ApplyMonteCarloBarostatKernel
::
Name
())
...
...
platforms/cuda2/src/CudaKernels.cpp
View file @
bd22eada
...
...
@@ -1669,277 +1669,277 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
cu
.
invalidateMolecules
();
}
//class CudaCustomNonbondedForceInfo : public CudaForceInfo {
class
CudaCustomNonbondedForceInfo
:
public
CudaForceInfo
{
public:
CudaCustomNonbondedForceInfo
(
const
CustomNonbondedForce
&
force
)
:
force
(
force
)
{
}
bool
areParticlesIdentical
(
int
particle1
,
int
particle2
)
{
vector
<
double
>
params1
;
vector
<
double
>
params2
;
force
.
getParticleParameters
(
particle1
,
params1
);
force
.
getParticleParameters
(
particle2
,
params2
);
for
(
int
i
=
0
;
i
<
(
int
)
params1
.
size
();
i
++
)
if
(
params1
[
i
]
!=
params2
[
i
])
return
false
;
return
true
;
}
int
getNumParticleGroups
()
{
return
force
.
getNumExclusions
();
}
void
getParticlesInGroup
(
int
index
,
vector
<
int
>&
particles
)
{
int
particle1
,
particle2
;
force
.
getExclusionParticles
(
index
,
particle1
,
particle2
);
particles
.
resize
(
2
);
particles
[
0
]
=
particle1
;
particles
[
1
]
=
particle2
;
}
bool
areGroupsIdentical
(
int
group1
,
int
group2
)
{
return
true
;
}
private:
const
CustomNonbondedForce
&
force
;
};
CudaCalcCustomNonbondedForceKernel
::~
CudaCalcCustomNonbondedForceKernel
()
{
cu
.
setAsCurrent
();
if
(
params
!=
NULL
)
delete
params
;
if
(
globals
!=
NULL
)
delete
globals
;
if
(
tabulatedFunctionParams
!=
NULL
)
delete
tabulatedFunctionParams
;
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
delete
tabulatedFunctions
[
i
];
}
void
CudaCalcCustomNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
CustomNonbondedForce
&
force
)
{
cu
.
setAsCurrent
();
int
forceIndex
;
for
(
forceIndex
=
0
;
forceIndex
<
system
.
getNumForces
()
&&
&
system
.
getForce
(
forceIndex
)
!=
&
force
;
++
forceIndex
)
;
string
prefix
=
"custom"
+
cu
.
intToString
(
forceIndex
)
+
"_"
;
// Record parameters and exclusions.
int
numParticles
=
force
.
getNumParticles
();
params
=
new
CudaParameterSet
(
cu
,
force
.
getNumPerParticleParameters
(),
numParticles
,
"customNonbondedParameters"
);
if
(
force
.
getNumGlobalParameters
()
>
0
)
globals
=
CudaArray
::
create
<
float
>
(
cu
,
force
.
getNumGlobalParameters
(),
"customNonbondedGlobals"
);
vector
<
vector
<
float
>
>
paramVector
(
numParticles
);
vector
<
vector
<
int
>
>
exclusionList
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
vector
<
double
>
parameters
;
force
.
getParticleParameters
(
i
,
parameters
);
paramVector
[
i
].
resize
(
parameters
.
size
());
for
(
int
j
=
0
;
j
<
(
int
)
parameters
.
size
();
j
++
)
paramVector
[
i
][
j
]
=
(
float
)
parameters
[
j
];
exclusionList
[
i
].
push_back
(
i
);
}
for
(
int
i
=
0
;
i
<
force
.
getNumExclusions
();
i
++
)
{
int
particle1
,
particle2
;
force
.
getExclusionParticles
(
i
,
particle1
,
particle2
);
exclusionList
[
particle1
].
push_back
(
particle2
);
exclusionList
[
particle2
].
push_back
(
particle1
);
}
params
->
setParameterValues
(
paramVector
);
// Record the tabulated functions.
CudaExpressionUtilities
::
FunctionPlaceholder
fp
;
map
<
string
,
Lepton
::
CustomFunction
*>
functions
;
vector
<
pair
<
string
,
string
>
>
functionDefinitions
;
vector
<
float4
>
tabulatedFunctionParamsVec
(
force
.
getNumFunctions
());
for
(
int
i
=
0
;
i
<
force
.
getNumFunctions
();
i
++
)
{
string
name
;
vector
<
double
>
values
;
double
min
,
max
;
force
.
getFunctionParameters
(
i
,
name
,
values
,
min
,
max
);
string
arrayName
=
prefix
+
"table"
+
cu
.
intToString
(
i
);
functionDefinitions
.
push_back
(
make_pair
(
name
,
arrayName
));
functions
[
name
]
=
&
fp
;
tabulatedFunctionParamsVec
[
i
]
=
make_float4
((
float
)
min
,
(
float
)
max
,
(
float
)
((
values
.
size
()
-
1
)
/
(
max
-
min
)),
(
float
)
values
.
size
()
-
2
);
vector
<
float4
>
f
=
cu
.
getExpressionUtilities
().
computeFunctionCoefficients
(
values
,
min
,
max
);
tabulatedFunctions
.
push_back
(
CudaArray
::
create
<
float4
>
(
cu
,
values
.
size
()
-
1
,
"TabulatedFunction"
));
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
upload
(
f
);
cu
.
getNonbondedUtilities
().
addArgument
(
CudaNonbondedUtilities
::
ParameterInfo
(
arrayName
,
"float"
,
4
,
sizeof
(
float4
),
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
getDevicePointer
()));
}
if
(
force
.
getNumFunctions
()
>
0
)
{
tabulatedFunctionParams
=
CudaArray
::
create
<
float4
>
(
cu
,
tabulatedFunctionParamsVec
.
size
(),
"tabulatedFunctionParameters"
);
tabulatedFunctionParams
->
upload
(
tabulatedFunctionParamsVec
);
cu
.
getNonbondedUtilities
().
addArgument
(
CudaNonbondedUtilities
::
ParameterInfo
(
prefix
+
"functionParams"
,
"float"
,
4
,
sizeof
(
float4
),
tabulatedFunctionParams
->
getDevicePointer
()));
}
// Record information for the expressions.
globalParamNames
.
resize
(
force
.
getNumGlobalParameters
());
globalParamValues
.
resize
(
force
.
getNumGlobalParameters
());
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
globalParamNames
[
i
]
=
force
.
getGlobalParameterName
(
i
);
globalParamValues
[
i
]
=
(
float
)
force
.
getGlobalParameterDefaultValue
(
i
);
}
if
(
globals
!=
NULL
)
globals
->
upload
(
globalParamValues
);
bool
useCutoff
=
(
force
.
getNonbondedMethod
()
!=
CustomNonbondedForce
::
NoCutoff
);
bool
usePeriodic
=
(
force
.
getNonbondedMethod
()
!=
CustomNonbondedForce
::
NoCutoff
&&
force
.
getNonbondedMethod
()
!=
CustomNonbondedForce
::
CutoffNonPeriodic
);
Lepton
::
ParsedExpression
energyExpression
=
Lepton
::
Parser
::
parse
(
force
.
getEnergyFunction
(),
functions
).
optimize
();
Lepton
::
ParsedExpression
forceExpression
=
energyExpression
.
differentiate
(
"r"
).
optimize
();
map
<
string
,
Lepton
::
ParsedExpression
>
forceExpressions
;
forceExpressions
[
"tempEnergy += "
]
=
energyExpression
;
forceExpressions
[
"tempForce -= "
]
=
forceExpression
;
// Create the kernels.
vector
<
pair
<
ExpressionTreeNode
,
string
>
>
variables
;
ExpressionTreeNode
rnode
(
new
Operation
::
Variable
(
"r"
));
variables
.
push_back
(
make_pair
(
rnode
,
"r"
));
variables
.
push_back
(
make_pair
(
ExpressionTreeNode
(
new
Operation
::
Square
(),
rnode
),
"r2"
));
variables
.
push_back
(
make_pair
(
ExpressionTreeNode
(
new
Operation
::
Reciprocal
(),
rnode
),
"invR"
));
for
(
int
i
=
0
;
i
<
force
.
getNumPerParticleParameters
();
i
++
)
{
const
string
&
name
=
force
.
getPerParticleParameterName
(
i
);
variables
.
push_back
(
makeVariable
(
name
+
"1"
,
prefix
+
"params"
+
params
->
getParameterSuffix
(
i
,
"1"
)));
variables
.
push_back
(
makeVariable
(
name
+
"2"
,
prefix
+
"params"
+
params
->
getParameterSuffix
(
i
,
"2"
)));
}
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
string
value
=
"globals["
+
cu
.
intToString
(
i
)
+
"]"
;
variables
.
push_back
(
makeVariable
(
name
,
prefix
+
value
));
}
stringstream
compute
;
compute
<<
cu
.
getExpressionUtilities
().
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
prefix
+
"temp"
,
prefix
+
"functionParams"
);
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
string
source
=
cu
.
replaceStrings
(
CudaKernelSources
::
customNonbonded
,
replacements
);
cu
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
,
force
.
getForceGroup
());
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
cu
.
getNonbondedUtilities
().
addParameter
(
CudaNonbondedUtilities
::
ParameterInfo
(
prefix
+
"params"
+
cu
.
intToString
(
i
+
1
),
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
}
if
(
globals
!=
NULL
)
{
globals
->
upload
(
globalParamValues
);
cu
.
getNonbondedUtilities
().
addArgument
(
CudaNonbondedUtilities
::
ParameterInfo
(
prefix
+
"globals"
,
"float"
,
1
,
sizeof
(
float
),
globals
->
getDevicePointer
()));
}
cu
.
addForce
(
new
CudaCustomNonbondedForceInfo
(
force
));
}
double
CudaCalcCustomNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
if
(
globals
!=
NULL
)
{
bool
changed
=
false
;
for
(
int
i
=
0
;
i
<
(
int
)
globalParamNames
.
size
();
i
++
)
{
float
value
=
(
float
)
context
.
getParameter
(
globalParamNames
[
i
]);
if
(
value
!=
globalParamValues
[
i
])
changed
=
true
;
globalParamValues
[
i
]
=
value
;
}
if
(
changed
)
globals
->
upload
(
globalParamValues
);
}
return
0.0
;
}
void
CudaCalcCustomNonbondedForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
)
{
cu
.
setAsCurrent
();
int
numParticles
=
force
.
getNumParticles
();
if
(
numParticles
!=
cu
.
getNumAtoms
())
throw
OpenMMException
(
"updateParametersInContext: The number of particles has changed"
);
// Record the per-particle parameters.
vector
<
vector
<
float
>
>
paramVector
(
numParticles
);
vector
<
double
>
parameters
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
force
.
getParticleParameters
(
i
,
parameters
);
paramVector
[
i
].
resize
(
parameters
.
size
());
for
(
int
j
=
0
;
j
<
(
int
)
parameters
.
size
();
j
++
)
paramVector
[
i
][
j
]
=
(
float
)
parameters
[
j
];
}
params
->
setParameterValues
(
paramVector
);
// Mark that the current reordering may be invalid.
cu
.
invalidateMolecules
();
}
//class CudaGBSAOBCForceInfo : public CudaForceInfo {
//public:
// Cuda
CustomNonbonded
ForceInfo(int requiredBuffers, const
CustomNonbonded
Force& force) : CudaForceInfo(requiredBuffers), force(force) {
// Cuda
GBSAOBC
ForceInfo(int requiredBuffers, const
GBSAOBC
Force& force) : CudaForceInfo(requiredBuffers), force(force) {
// }
// bool areParticlesIdentical(int particle1, int particle2) {
// vector<double> params1;
// vector<double> params2;
// force.getParticleParameters(particle1, params1);
// force.getParticleParameters(particle2, params2);
// for (int i = 0; i < (int) params1.size(); i++)
// if (params1[i] != params2[i])
// return false;
// return true;
// }
// int getNumParticleGroups() {
// return force.getNumExclusions();
// }
// void getParticlesInGroup(int index, vector<int>& particles) {
// int particle1, particle2;
// force.getExclusionParticles(index, particle1, particle2);
// particles.resize(2);
// particles[0] = particle1;
// particles[1] = particle2;
// }
// bool areGroupsIdentical(int group1, int group2) {
// return true;
// double charge1, charge2, radius1, radius2, scale1, scale2;
// force.getParticleParameters(particle1, charge1, radius1, scale1);
// force.getParticleParameters(particle2, charge2, radius2, scale2);
// return (charge1 == charge2 && radius1 == radius2 && scale1 == scale2);
// }
//private:
// const
CustomNonbonded
Force& force;
// const
GBSAOBC
Force& force;
//};
//
//CudaCalc
CustomNonbonded
ForceKernel::~CudaCalc
CustomNonbonded
ForceKernel() {
//CudaCalc
GBSAOBC
ForceKernel::~CudaCalc
GBSAOBC
ForceKernel() {
// cu.setAsCurrent();
// if (params != NULL)
// delete params;
// if (globals != NULL)
// delete globals;
// if (tabulatedFunctionParams != NULL)
// delete tabulatedFunctionParams;
// for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
// delete tabulatedFunctions[i];
// if (bornSum != NULL)
// delete bornSum;
// if (longBornSum != NULL)
// delete longBornSum;
// if (bornRadii != NULL)
// delete bornRadii;
// if (bornForce != NULL)
// delete bornForce;
// if (longBornForce != NULL)
// delete longBornForce;
// if (obcChain != NULL)
// delete obcChain;
//}
//
//void CudaCalc
CustomNonbonded
ForceKernel::initialize(const System& system, const
CustomNonbonded
Force& force) {
//void CudaCalc
GBSAOBC
ForceKernel::initialize(const System& system, const
GBSAOBC
Force& force) {
// cu.setAsCurrent();
// int forceIndex;
// for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
// ;
// string prefix = "custom"+cu.intToString(forceIndex)+"_";
//
// // Record parameters and exclusions.
//
// if (cu.getPlatformData().contexts.size() > 1)
// throw OpenMMException("GBSAOBCForce does not support using multiple CUDA devices");
// CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// params = new CudaArray<mm_float2>(cu, cu.getPaddedNumAtoms(), "gbsaObcParams");
// bornRadii = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms(), "bornRadii");
// obcChain = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms(), "obcChain");
// if (cu.getSupports64BitGlobalAtomics()) {
// longBornSum = new CudaArray<cl_long>(cu, cu.getPaddedNumAtoms(), "longBornSum");
// longBornForce = new CudaArray<cl_long>(cu, cu.getPaddedNumAtoms(), "longBornForce");
// bornForce = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms(), "bornForce");
// cu.addAutoclearBuffer(longBornSum->getDevicePointer(), 2*longBornSum->getSize());
// cu.addAutoclearBuffer(longBornForce->getDevicePointer(), 2*longBornForce->getSize());
// }
// else {
// bornSum = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornSum");
// bornForce = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornForce");
// cu.addAutoclearBuffer(bornSum->getDevicePointer(), bornSum->getSize());
// cu.addAutoclearBuffer(bornForce->getDevicePointer(), bornForce->getSize());
// }
// CudaArray<mm_float4>& posq = cu.getPosq();
// int numParticles = force.getNumParticles();
// params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
// if (force.getNumGlobalParameters() > 0)
// globals = new CudaArray<cl_float>(cu, force.getNumGlobalParameters(), "customNonbondedGlobals", false, CL_MEM_READ_ONLY);
// vector<vector<cl_float> > paramVector(numParticles);
// vector<vector<int> > exclusionList(numParticles);
// vector<mm_float2> paramsVector(numParticles);
// const double dielectricOffset = 0.009;
// for (int i = 0; i < numParticles; i++) {
// vector<double> parameters;
// force.getParticleParameters(i, parameters);
// paramVector[i].resize(parameters.size());
// for (int j = 0; j < (int) parameters.size(); j++)
// paramVector[i][j] = (cl_float) parameters[j];
// exclusionList[i].push_back(i);
// }
// for (int i = 0; i < force.getNumExclusions(); i++) {
// int particle1, particle2;
// force.getExclusionParticles(i, particle1, particle2);
// exclusionList[particle1].push_back(particle2);
// exclusionList[particle2].push_back(particle1);
// double charge, radius, scalingFactor;
// force.getParticleParameters(i, charge, radius, scalingFactor);
// radius -= dielectricOffset;
// paramsVector[i] = mm_float2((float) radius, (float) (scalingFactor*radius));
// posq[i].w = (float) charge;
// }
// params->setParameterValues(paramVector);
// posq.upload();
// params->upload(paramsVector);
// prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
// bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
// bool usePeriodic = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff && force.getNonbondedMethod() != GBSAOBCForce::CutoffNonPeriodic);
// string source = CudaKernelSources::gbsaObc2;
// nb.addInteraction(useCutoff, usePeriodic, false, force.getCutoffDistance(), vector<vector<int> >(), source, force.getForceGroup());
// nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(cl_float2), params->getDevicePointer()));;
// nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "float", 1, sizeof(cl_float), bornForce->getDevicePointer()));;
// cu.addForce(new CudaGBSAOBCForceInfo(nb.getNumForceBuffers(), force));
//}
//
// // Record the tabulated functions.
//
// CudaExpressionUtilities::FunctionPlaceholder fp;
// map<string, Lepton::CustomFunction*> functions;
// vector<pair<string, string> > functionDefinitions;
// vector<mm_float4> tabulatedFunctionParamsVec(force.getNumFunctions());
// for (int i = 0; i < force.getNumFunctions(); i++) {
// string name;
// vector<double> values;
// double min, max;
// force.getFunctionParameters(i, name, values, min, max);
// string arrayName = prefix+"table"+cu.intToString(i);
// functionDefinitions.push_back(make_pair(name, arrayName));
// functions[name] = &fp;
// tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
// vector<mm_float4> f = cu.getExpressionUtilities().computeFunctionCoefficients(values, min, max);
// tabulatedFunctions.push_back(new CudaArray<mm_float4>(cu, values.size()-1, "TabulatedFunction"));
// tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
// cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", 4, sizeof(cl_float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDevicePointer()));
// }
// if (force.getNumFunctions() > 0) {
// tabulatedFunctionParams = new CudaArray<mm_float4>(cu, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY);
// tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
// cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"functionParams", "float", 4, sizeof(cl_float4), tabulatedFunctionParams->getDevicePointer()));
// }
//
// // Record information for the expressions.
//
// globalParamNames.resize(force.getNumGlobalParameters());
// globalParamValues.resize(force.getNumGlobalParameters());
// for (int i = 0; i < force.getNumGlobalParameters(); i++) {
// globalParamNames[i] = force.getGlobalParameterName(i);
// globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
// }
// if (globals != NULL)
// globals->upload(globalParamValues);
// bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff);
// bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic);
// Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
// Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r").optimize();
// map<string, Lepton::ParsedExpression> forceExpressions;
// forceExpressions["tempEnergy += "] = energyExpression;
// forceExpressions["tempForce -= "] = forceExpression;
//
// // Create the kernels.
//
// vector<pair<ExpressionTreeNode, string> > variables;
// ExpressionTreeNode rnode(new Operation::Variable("r"));
// variables.push_back(make_pair(rnode, "r"));
// variables.push_back(make_pair(ExpressionTreeNode(new Operation::Square(), rnode), "r2"));
// variables.push_back(make_pair(ExpressionTreeNode(new Operation::Reciprocal(), rnode), "invR"));
// for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
// const string& name = force.getPerParticleParameterName(i);
// variables.push_back(makeVariable(name+"1", prefix+"params"+params->getParameterSuffix(i, "1")));
// variables.push_back(makeVariable(name+"2", prefix+"params"+params->getParameterSuffix(i, "2")));
// }
// for (int i = 0; i < force.getNumGlobalParameters(); i++) {
// const string& name = force.getGlobalParameterName(i);
// string value = "globals["+cu.intToString(i)+"]";
// variables.push_back(makeVariable(name, prefix+value));
// }
// stringstream compute;
// compute << cu.getExpressionUtilities().createExpressions(forceExpressions, variables, functionDefinitions, prefix+"temp", prefix+"functionParams");
// map<string, string> replacements;
// replacements["COMPUTE_FORCE"] = compute.str();
// string source = cu.replaceStrings(CudaKernelSources::customNonbonded, replacements);
// cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
// for (int i = 0; i < (int) params->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
// cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"params"+cu.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
// }
// if (globals != NULL) {
// globals->upload(globalParamValues);
// cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals->getDevicePointer()));
// }
// cu.addForce(new CudaCustomNonbondedForceInfo(cu.getNonbondedUtilities().getNumForceBuffers(), force));
//}
//
//double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
// if (globals != NULL) {
// bool changed = false;
// for (int i = 0; i < (int) globalParamNames.size(); i++) {
// cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
// if (value != globalParamValues[i])
// changed = true;
// globalParamValues[i] = value;
// }
// if (changed)
// globals->upload(globalParamValues);
// }
// return 0.0;
//}
//
//void CudaCalcCustomNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force) {
// cu.setAsCurrent();
// int numParticles = force.getNumParticles();
// if (numParticles != cu.getNumAtoms())
// throw OpenMMException("updateParametersInContext: The number of particles has changed");
//
// // Record the per-particle parameters.
//
// vector<vector<cl_float> > paramVector(numParticles);
// vector<double> parameters;
// for (int i = 0; i < numParticles; i++) {
// force.getParticleParameters(i, parameters);
// paramVector[i].resize(parameters.size());
// for (int j = 0; j < (int) parameters.size(); j++)
// paramVector[i][j] = (cl_float) parameters[j];
// }
// params->setParameterValues(paramVector);
//
// // Mark that the current reordering may be invalid.
//
// cu.invalidateMolecules();
//}
//
//class CudaGBSAOBCForceInfo : public CudaForceInfo {
//public:
// CudaGBSAOBCForceInfo(int requiredBuffers, const GBSAOBCForce& force) : CudaForceInfo(requiredBuffers), force(force) {
// }
// bool areParticlesIdentical(int particle1, int particle2) {
// double charge1, charge2, radius1, radius2, scale1, scale2;
// force.getParticleParameters(particle1, charge1, radius1, scale1);
// force.getParticleParameters(particle2, charge2, radius2, scale2);
// return (charge1 == charge2 && radius1 == radius2 && scale1 == scale2);
// }
//private:
// const GBSAOBCForce& force;
//};
//
//CudaCalcGBSAOBCForceKernel::~CudaCalcGBSAOBCForceKernel() {
// cu.setAsCurrent();
// if (params != NULL)
// delete params;
// if (bornSum != NULL)
// delete bornSum;
// if (longBornSum != NULL)
// delete longBornSum;
// if (bornRadii != NULL)
// delete bornRadii;
// if (bornForce != NULL)
// delete bornForce;
// if (longBornForce != NULL)
// delete longBornForce;
// if (obcChain != NULL)
// delete obcChain;
//}
//
//void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
// cu.setAsCurrent();
// if (cu.getPlatformData().contexts.size() > 1)
// throw OpenMMException("GBSAOBCForce does not support using multiple CUDA devices");
// CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// params = new CudaArray<mm_float2>(cu, cu.getPaddedNumAtoms(), "gbsaObcParams");
// bornRadii = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms(), "bornRadii");
// obcChain = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms(), "obcChain");
// if (cu.getSupports64BitGlobalAtomics()) {
// longBornSum = new CudaArray<cl_long>(cu, cu.getPaddedNumAtoms(), "longBornSum");
// longBornForce = new CudaArray<cl_long>(cu, cu.getPaddedNumAtoms(), "longBornForce");
// bornForce = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms(), "bornForce");
// cu.addAutoclearBuffer(longBornSum->getDevicePointer(), 2*longBornSum->getSize());
// cu.addAutoclearBuffer(longBornForce->getDevicePointer(), 2*longBornForce->getSize());
// }
// else {
// bornSum = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornSum");
// bornForce = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornForce");
// cu.addAutoclearBuffer(bornSum->getDevicePointer(), bornSum->getSize());
// cu.addAutoclearBuffer(bornForce->getDevicePointer(), bornForce->getSize());
// }
// CudaArray<mm_float4>& posq = cu.getPosq();
// int numParticles = force.getNumParticles();
// vector<mm_float2> paramsVector(numParticles);
// const double dielectricOffset = 0.009;
// for (int i = 0; i < numParticles; i++) {
// double charge, radius, scalingFactor;
// force.getParticleParameters(i, charge, radius, scalingFactor);
// radius -= dielectricOffset;
// paramsVector[i] = mm_float2((float) radius, (float) (scalingFactor*radius));
// posq[i].w = (float) charge;
// }
// posq.upload();
// params->upload(paramsVector);
// prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
// bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
// bool usePeriodic = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff && force.getNonbondedMethod() != GBSAOBCForce::CutoffNonPeriodic);
// string source = CudaKernelSources::gbsaObc2;
// nb.addInteraction(useCutoff, usePeriodic, false, force.getCutoffDistance(), vector<vector<int> >(), source, force.getForceGroup());
// nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(cl_float2), params->getDevicePointer()));;
// nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "float", 1, sizeof(cl_float), bornForce->getDevicePointer()));;
// cu.addForce(new CudaGBSAOBCForceInfo(nb.getNumForceBuffers(), force));
//}
//
//double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
// CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// bool deviceIsCpu = (cu.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
// if (!hasCreatedKernels) {
// // These Kernels cannot be created in initialize(), because the CudaNonbondedUtilities has not been initialized yet then.
//double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
// CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// bool deviceIsCpu = (cu.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
// if (!hasCreatedKernels) {
// // These Kernels cannot be created in initialize(), because the CudaNonbondedUtilities has not been initialized yet then.
//
// hasCreatedKernels = true;
// maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : 0);
...
...
@@ -3172,557 +3172,524 @@ void CudaCalcCustomExternalForceKernel::copyParametersToContext(ContextImpl& con
cu
.
invalidateMolecules
();
}
//class CudaCustomHbondForceInfo : public CudaForceInfo {
//public:
// CudaCustomHbondForceInfo(int requiredBuffers, const CustomHbondForce& force) : CudaForceInfo(requiredBuffers), force(force) {
// }
// bool areParticlesIdentical(int particle1, int particle2) {
// return true;
// }
// int getNumParticleGroups() {
// return force.getNumDonors()+force.getNumAcceptors()+force.getNumExclusions();
// }
// void getParticlesInGroup(int index, vector<int>& particles) {
// int p1, p2, p3;
// vector<double> parameters;
// if (index < force.getNumDonors()) {
// force.getDonorParameters(index, p1, p2, p3, parameters);
// particles.clear();
// particles.push_back(p1);
// if (p2 > -1)
// particles.push_back(p2);
// if (p3 > -1)
// particles.push_back(p3);
// return;
// }
// index -= force.getNumDonors();
// if (index < force.getNumAcceptors()) {
// force.getAcceptorParameters(index, p1, p2, p3, parameters);
// particles.clear();
// particles.push_back(p1);
// if (p2 > -1)
// particles.push_back(p2);
// if (p3 > -1)
// particles.push_back(p3);
// return;
// }
// index -= force.getNumAcceptors();
// int donor, acceptor;
// force.getExclusionParticles(index, donor, acceptor);
// particles.clear();
// force.getDonorParameters(donor, p1, p2, p3, parameters);
// particles.push_back(p1);
// if (p2 > -1)
// particles.push_back(p2);
// if (p3 > -1)
// particles.push_back(p3);
// force.getAcceptorParameters(acceptor, p1, p2, p3, parameters);
// particles.push_back(p1);
// if (p2 > -1)
// particles.push_back(p2);
// if (p3 > -1)
// particles.push_back(p3);
// }
// bool areGroupsIdentical(int group1, int group2) {
// int p1, p2, p3;
// vector<double> params1, params2;
// if (group1 < force.getNumDonors() && group2 < force.getNumDonors()) {
// force.getDonorParameters(group1, p1, p2, p3, params1);
// force.getDonorParameters(group2, p1, p2, p3, params2);
// return (params1 == params2 && params1 == params2);
// }
// if (group1 < force.getNumDonors() || group2 < force.getNumDonors())
// return false;
// group1 -= force.getNumDonors();
// group2 -= force.getNumDonors();
// if (group1 < force.getNumAcceptors() && group2 < force.getNumAcceptors()) {
// force.getAcceptorParameters(group1, p1, p2, p3, params1);
// force.getAcceptorParameters(group2, p1, p2, p3, params2);
// return (params1 == params2 && params1 == params2);
// }
// if (group1 < force.getNumAcceptors() || group2 < force.getNumAcceptors())
// return false;
// return true;
// }
//private:
// const CustomHbondForce& force;
//};
//
//CudaCalcCustomHbondForceKernel::~CudaCalcCustomHbondForceKernel() {
// cu.setAsCurrent();
// if (donorParams != NULL)
// delete donorParams;
// if (acceptorParams != NULL)
// delete acceptorParams;
// if (donors != NULL)
// delete donors;
// if (acceptors != NULL)
// delete acceptors;
// if (donorBufferIndices != NULL)
// delete donorBufferIndices;
// if (acceptorBufferIndices != NULL)
// delete acceptorBufferIndices;
// if (globals != NULL)
// delete globals;
// if (donorExclusions != NULL)
// delete donorExclusions;
// if (acceptorExclusions != NULL)
// delete acceptorExclusions;
// if (tabulatedFunctionParams != NULL)
// delete tabulatedFunctionParams;
// for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
// delete tabulatedFunctions[i];
//}
//
//static void addDonorAndAcceptorCode(stringstream& computeDonor, stringstream& computeAcceptor, const string& value) {
// computeDonor << value;
// computeAcceptor << value;
//}
//
//static void applyDonorAndAcceptorForces(stringstream& applyToDonor, stringstream& applyToAcceptor, int atom, const string& value) {
// string forceNames[] = {"f1", "f2", "f3"};
// if (atom < 3)
// applyToAcceptor << forceNames[atom]<<".xyz += "<<value<<";\n";
// else
// applyToDonor << forceNames[atom-3]<<".xyz += "<<value<<";\n";
//}
//
//void CudaCalcCustomHbondForceKernel::initialize(const System& system, const CustomHbondForce& force) {
// // Record the lists of donors and acceptors, and the parameters for each one.
//
// cu.setAsCurrent();
// int numContexts = cu.getPlatformData().contexts.size();
// int startIndex = cu.getContextIndex()*force.getNumDonors()/numContexts;
// int endIndex = (cu.getContextIndex()+1)*force.getNumDonors()/numContexts;
// numDonors = endIndex-startIndex;
// numAcceptors = force.getNumAcceptors();
// if (numDonors == 0 || numAcceptors == 0)
// return;
// int numParticles = system.getNumParticles();
// donors = new CudaArray<mm_int4>(cu, numDonors, "customHbondDonors");
// acceptors = new CudaArray<mm_int4>(cu, numAcceptors, "customHbondAcceptors");
// donorParams = new CudaParameterSet(cu, force.getNumPerDonorParameters(), numDonors, "customHbondDonorParameters");
// acceptorParams = new CudaParameterSet(cu, force.getNumPerAcceptorParameters(), numAcceptors, "customHbondAcceptorParameters");
// if (force.getNumGlobalParameters() > 0)
// globals = new CudaArray<cl_float>(cu, force.getNumGlobalParameters(), "customHbondGlobals", false, CL_MEM_READ_ONLY);
// vector<vector<cl_float> > donorParamVector(numDonors);
// vector<mm_int4> donorVector(numDonors);
// for (int i = 0; i < numDonors; i++) {
// vector<double> parameters;
// force.getDonorParameters(startIndex+i, donorVector[i].x, donorVector[i].y, donorVector[i].z, parameters);
// donorParamVector[i].resize(parameters.size());
// for (int j = 0; j < (int) parameters.size(); j++)
// donorParamVector[i][j] = (cl_float) parameters[j];
// }
// donors->upload(donorVector);
// donorParams->setParameterValues(donorParamVector);
// vector<vector<cl_float> > acceptorParamVector(numAcceptors);
// vector<mm_int4> acceptorVector(numAcceptors);
// for (int i = 0; i < numAcceptors; i++) {
// vector<double> parameters;
// force.getAcceptorParameters(i, acceptorVector[i].x, acceptorVector[i].y, acceptorVector[i].z, parameters);
// acceptorParamVector[i].resize(parameters.size());
// for (int j = 0; j < (int) parameters.size(); j++)
// acceptorParamVector[i][j] = (cl_float) parameters[j];
// }
// acceptors->upload(acceptorVector);
// acceptorParams->setParameterValues(acceptorParamVector);
//
// // Select an output buffer index for each donor and acceptor.
//
// donorBufferIndices = new CudaArray<mm_int4>(cu, numDonors, "customHbondDonorBuffers");
// acceptorBufferIndices = new CudaArray<mm_int4>(cu, numAcceptors, "customHbondAcceptorBuffers");
// vector<mm_int4> donorBufferVector(numDonors);
// vector<mm_int4> acceptorBufferVector(numAcceptors);
// vector<int> donorBufferCounter(numParticles, 0);
// for (int i = 0; i < numDonors; i++)
// donorBufferVector[i] = mm_int4(donorVector[i].x > -1 ? donorBufferCounter[donorVector[i].x]++ : 0,
// donorVector[i].y > -1 ? donorBufferCounter[donorVector[i].y]++ : 0,
// donorVector[i].z > -1 ? donorBufferCounter[donorVector[i].z]++ : 0, 0);
// vector<int> acceptorBufferCounter(numParticles, 0);
// for (int i = 0; i < numAcceptors; i++)
// acceptorBufferVector[i] = mm_int4(acceptorVector[i].x > -1 ? acceptorBufferCounter[acceptorVector[i].x]++ : 0,
// acceptorVector[i].y > -1 ? acceptorBufferCounter[acceptorVector[i].y]++ : 0,
// acceptorVector[i].z > -1 ? acceptorBufferCounter[acceptorVector[i].z]++ : 0, 0);
// donorBufferIndices->upload(donorBufferVector);
// acceptorBufferIndices->upload(acceptorBufferVector);
// int maxBuffers = 1;
// for (int i = 0; i < (int) donorBufferCounter.size(); i++)
// maxBuffers = max(maxBuffers, donorBufferCounter[i]);
// for (int i = 0; i < (int) acceptorBufferCounter.size(); i++)
// maxBuffers = max(maxBuffers, acceptorBufferCounter[i]);
// cu.addForce(new CudaCustomHbondForceInfo(maxBuffers, force));
//
// // Record exclusions.
//
// vector<mm_int4> donorExclusionVector(numDonors, mm_int4(-1, -1, -1, -1));
// vector<mm_int4> acceptorExclusionVector(numAcceptors, mm_int4(-1, -1, -1, -1));
// for (int i = 0; i < force.getNumExclusions(); i++) {
// int donor, acceptor;
// force.getExclusionParticles(i, donor, acceptor);
// if (donor < startIndex || donor >= endIndex)
// continue;
// donor -= startIndex;
// if (donorExclusionVector[donor].x == -1)
// donorExclusionVector[donor].x = acceptor;
// else if (donorExclusionVector[donor].y == -1)
// donorExclusionVector[donor].y = acceptor;
// else if (donorExclusionVector[donor].z == -1)
// donorExclusionVector[donor].z = acceptor;
// else if (donorExclusionVector[donor].w == -1)
// donorExclusionVector[donor].w = acceptor;
// else
// throw OpenMMException("CustomHbondForce: CudaPlatform does not support more than four exclusions per donor");
// if (acceptorExclusionVector[acceptor].x == -1)
// acceptorExclusionVector[acceptor].x = donor;
// else if (acceptorExclusionVector[acceptor].y == -1)
// acceptorExclusionVector[acceptor].y = donor;
// else if (acceptorExclusionVector[acceptor].z == -1)
// acceptorExclusionVector[acceptor].z = donor;
// else if (acceptorExclusionVector[acceptor].w == -1)
// acceptorExclusionVector[acceptor].w = donor;
// else
// throw OpenMMException("CustomHbondForce: CudaPlatform does not support more than four exclusions per acceptor");
// }
// donorExclusions = new CudaArray<mm_int4>(cu, numDonors, "customHbondDonorExclusions");
// acceptorExclusions = new CudaArray<mm_int4>(cu, numDonors, "customHbondAcceptorExclusions");
// donorExclusions->upload(donorExclusionVector);
// acceptorExclusions->upload(acceptorExclusionVector);
//
// // Record the tabulated functions.
//
// CudaExpressionUtilities::FunctionPlaceholder fp;
// map<string, Lepton::CustomFunction*> functions;
// vector<pair<string, string> > functionDefinitions;
// vector<mm_float4> tabulatedFunctionParamsVec(force.getNumFunctions());
// stringstream tableArgs;
// for (int i = 0; i < force.getNumFunctions(); i++) {
// string name;
// vector<double> values;
// double min, max;
// force.getFunctionParameters(i, name, values, min, max);
// string arrayName = "table"+cu.intToString(i);
// functionDefinitions.push_back(make_pair(name, arrayName));
// functions[name] = &fp;
// tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
// vector<mm_float4> f = cu.getExpressionUtilities().computeFunctionCoefficients(values, min, max);
// tabulatedFunctions.push_back(new CudaArray<mm_float4>(cu, values.size()-1, "TabulatedFunction"));
// tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
// tableArgs << ", __global const float4* restrict " << arrayName;
// }
// if (force.getNumFunctions() > 0) {
// tabulatedFunctionParams = new CudaArray<mm_float4>(cu, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY);
// tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
// tableArgs << ", __global const float4* restrict functionParams";
// }
//
// // Record information about parameters.
//
// globalParamNames.resize(force.getNumGlobalParameters());
// globalParamValues.resize(force.getNumGlobalParameters());
// for (int i = 0; i < force.getNumGlobalParameters(); i++) {
// globalParamNames[i] = force.getGlobalParameterName(i);
// globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
// }
// if (globals != NULL)
// globals->upload(globalParamValues);
// map<string, string> variables;
// for (int i = 0; i < force.getNumPerDonorParameters(); i++) {
// const string& name = force.getPerDonorParameterName(i);
// variables[name] = "donorParams"+donorParams->getParameterSuffix(i);
// }
// for (int i = 0; i < force.getNumPerAcceptorParameters(); i++) {
// const string& name = force.getPerAcceptorParameterName(i);
// variables[name] = "acceptorParams"+acceptorParams->getParameterSuffix(i);
// }
// for (int i = 0; i < force.getNumGlobalParameters(); i++) {
// const string& name = force.getGlobalParameterName(i);
// variables[name] = "globals["+cu.intToString(i)+"]";
// }
//
// // Now to generate the kernel. First, it needs to calculate all distances, angles,
// // and dihedrals the expression depends on.
//
// map<string, vector<int> > distances;
// map<string, vector<int> > angles;
// map<string, vector<int> > dihedrals;
// Lepton::ParsedExpression energyExpression = CustomHbondForceImpl::prepareExpression(force, functions, distances, angles, dihedrals);
// map<string, Lepton::ParsedExpression> forceExpressions;
// set<string> computedDeltas;
// computedDeltas.insert("D1A1");
// string atomNames[] = {"A1", "A2", "A3", "D1", "D2", "D3"};
// string atomNamesLower[] = {"a1", "a2", "a3", "d1", "d2", "d3"};
// stringstream computeDonor, computeAcceptor, extraArgs;
// int index = 0;
// for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter, ++index) {
// const vector<int>& atoms = iter->second;
// string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]];
// if (computedDeltas.count(deltaName) == 0) {
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+");\n");
// computedDeltas.insert(deltaName);
// }
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float r_"+deltaName+" = sqrt(delta"+deltaName+".w);\n");
// variables[iter->first] = "r_"+deltaName;
// forceExpressions["float dEdDistance"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
// }
// index = 0;
// for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter, ++index) {
// const vector<int>& atoms = iter->second;
// string deltaName1 = atomNames[atoms[1]]+atomNames[atoms[0]];
// string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]];
// string angleName = "angle_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]];
// if (computedDeltas.count(deltaName1) == 0) {
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[0]]+");\n");
// computedDeltas.insert(deltaName1);
// }
// if (computedDeltas.count(deltaName2) == 0) {
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[2]]+");\n");
// computedDeltas.insert(deltaName2);
// }
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float "+angleName+" = computeAngle(delta"+deltaName1+", delta"+deltaName2+");\n");
// variables[iter->first] = angleName;
// forceExpressions["float dEdAngle"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
// }
// index = 0;
// for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter, ++index) {
// const vector<int>& atoms = iter->second;
// string deltaName1 = atomNames[atoms[0]]+atomNames[atoms[1]];
// string deltaName2 = atomNames[atoms[2]]+atomNames[atoms[1]];
// string deltaName3 = atomNames[atoms[2]]+atomNames[atoms[3]];
// string crossName1 = "cross_"+deltaName1+"_"+deltaName2;
// string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
// string dihedralName = "dihedral_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]+atomNames[atoms[3]];
// if (computedDeltas.count(deltaName1) == 0) {
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+");\n");
// computedDeltas.insert(deltaName1);
// }
// if (computedDeltas.count(deltaName2) == 0) {
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[1]]+");\n");
// computedDeltas.insert(deltaName2);
// }
// if (computedDeltas.count(deltaName3) == 0) {
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName3+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[3]]+");\n");
// computedDeltas.insert(deltaName3);
// }
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 "+crossName1+" = computeCross(delta"+deltaName1+", delta"+deltaName2+");\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 "+crossName2+" = computeCross(delta"+deltaName2+", delta"+deltaName3+");\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float "+dihedralName+" = computeAngle("+crossName1+", "+crossName2+");\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, dihedralName+" *= (delta"+deltaName1+".x*"+crossName2+".x + delta"+deltaName1+".y*"+crossName2+".y + delta"+deltaName1+".z*"+crossName2+".z < 0 ? -1 : 1);\n");
// variables[iter->first] = dihedralName;
// forceExpressions["float dEdDihedral"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
// }
//
// // Next it needs to load parameters from global memory.
//
// if (force.getNumGlobalParameters() > 0)
// extraArgs << ", __global const float* restrict globals";
// for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
// extraArgs << ", __global const "+buffer.getType()+"* restrict donor"+buffer.getName();
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" donorParams"+cu.intToString(i+1)+" = donor"+buffer.getName()+"[index];\n");
// }
// for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i];
// extraArgs << ", __global const "+buffer.getType()+"* restrict acceptor"+buffer.getName();
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" acceptorParams"+cu.intToString(i+1)+" = acceptor"+buffer.getName()+"[index];\n");
// }
//
// // Now evaluate the expressions.
//
// computeAcceptor << cu.getExpressionUtilities().createExpressions(forceExpressions, variables, functionDefinitions, "temp", "functionParams");
// forceExpressions["energy += "] = energyExpression;
// computeDonor << cu.getExpressionUtilities().createExpressions(forceExpressions, variables, functionDefinitions, "temp", "functionParams");
//
// // Finally, apply forces to atoms.
//
// index = 0;
// for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter, ++index) {
// const vector<int>& atoms = iter->second;
// string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]];
// string value = "(dEdDistance"+cu.intToString(index)+"/r_"+deltaName+")*delta"+deltaName+".xyz";
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[0], "-"+value);
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[1], value);
// }
// index = 0;
// for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter, ++index) {
// const vector<int>& atoms = iter->second;
// string deltaName1 = atomNames[atoms[1]]+atomNames[atoms[0]];
// string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]];
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "{\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 crossProd = cross(delta"+deltaName2+", delta"+deltaName1+");\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float lengthCross = max(length(crossProd), 1e-6f);\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 deltaCross0 = -cross(delta"+deltaName1+", crossProd)*dEdAngle"+cu.intToString(index)+"/(delta"+deltaName1+".w*lengthCross);\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 deltaCross2 = cross(delta"+deltaName2+", crossProd)*dEdAngle"+cu.intToString(index)+"/(delta"+deltaName2+".w*lengthCross);\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 deltaCross1 = -(deltaCross0+deltaCross2);\n");
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[0], "deltaCross0.xyz");
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[1], "deltaCross1.xyz");
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[2], "deltaCross2.xyz");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "}\n");
// }
// index = 0;
// for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter, ++index) {
// const vector<int>& atoms = iter->second;
// string deltaName1 = atomNames[atoms[0]]+atomNames[atoms[1]];
// string deltaName2 = atomNames[atoms[2]]+atomNames[atoms[1]];
// string deltaName3 = atomNames[atoms[2]]+atomNames[atoms[3]];
// string crossName1 = "cross_"+deltaName1+"_"+deltaName2;
// string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "{\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float r = sqrt(delta"+deltaName2+".w);\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 ff;\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.x = (-dEdDihedral"+cu.intToString(index)+"*r)/"+crossName1+".w;\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.y = (delta"+deltaName1+".x*delta"+deltaName2+".x + delta"+deltaName1+".y*delta"+deltaName2+".y + delta"+deltaName1+".z*delta"+deltaName2+".z)/delta"+deltaName2+".w;\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.z = (delta"+deltaName3+".x*delta"+deltaName2+".x + delta"+deltaName3+".y*delta"+deltaName2+".y + delta"+deltaName3+".z*delta"+deltaName2+".z)/delta"+deltaName2+".w;\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.w = (dEdDihedral"+cu.intToString(index)+"*r)/"+crossName2+".w;\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 internalF0 = ff.x*"+crossName1+";\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 internalF3 = ff.w*"+crossName2+";\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 s = ff.y*internalF0 - ff.z*internalF3;\n");
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[0], "internalF0.xyz");
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[1], "s.xyz-internalF0.xyz");
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[2], "-s.xyz-internalF3.xyz");
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[3], "internalF3.xyz");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "}\n");
// }
//
// // Generate the kernels.
//
// map<string, string> replacements;
// replacements["COMPUTE_DONOR_FORCE"] = computeDonor.str();
// replacements["COMPUTE_ACCEPTOR_FORCE"] = computeAcceptor.str();
// replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
// map<string, string> defines;
// defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
// defines["NUM_DONORS"] = cu.intToString(numDonors);
// defines["NUM_ACCEPTORS"] = cu.intToString(numAcceptors);
// defines["M_PI"] = cu.doubleToString(M_PI);
// if (force.getNonbondedMethod() != CustomHbondForce::NoCutoff) {
// defines["USE_CUTOFF"] = "1";
// defines["CUTOFF_SQUARED"] = cu.doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
// }
// if (force.getNonbondedMethod() != CustomHbondForce::NoCutoff && force.getNonbondedMethod() != CustomHbondForce::CutoffNonPeriodic)
// defines["USE_PERIODIC"] = "1";
// if (force.getNumExclusions() > 0)
// defines["USE_EXCLUSIONS"] = "1";
// CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customHbondForce, replacements), defines);
// donorKernel = cu.getKernel(module, "computeDonorForces");
// acceptorKernel = cu.getKernel(module, "computeAcceptorForces");
//}
//
//double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
// if (numDonors == 0 || numAcceptors == 0)
// return 0.0;
// if (globals != NULL) {
// bool changed = false;
// for (int i = 0; i < (int) globalParamNames.size(); i++) {
// cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
// if (value != globalParamValues[i])
// changed = true;
// globalParamValues[i] = value;
// }
// if (changed)
// globals->upload(globalParamValues);
// }
// if (!hasInitializedKernel) {
// hasInitializedKernel = true;
// int index = 0;
// donorKernel.setArg<cu::Buffer>(index++, cu.getForceBuffers().getDevicePointer());
// donorKernel.setArg<cu::Buffer>(index++, cu.getEnergyBuffer().getDevicePointer());
// donorKernel.setArg<cu::Buffer>(index++, cu.getPosq().getDevicePointer());
// donorKernel.setArg<cu::Buffer>(index++, donorExclusions->getDevicePointer());
// donorKernel.setArg<cu::Buffer>(index++, donors->getDevicePointer());
// donorKernel.setArg<cu::Buffer>(index++, acceptors->getDevicePointer());
// donorKernel.setArg<cu::Buffer>(index++, donorBufferIndices->getDevicePointer());
// donorKernel.setArg(index++, 3*CudaContext::ThreadBlockSize*sizeof(mm_float4), NULL);
// index += 2; // Periodic box size arguments are set when the kernel is executed.
// if (globals != NULL)
// donorKernel.setArg<cu::Buffer>(index++, globals->getDevicePointer());
// for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
// donorKernel.setArg<cu::Memory>(index++, buffer.getMemory());
// }
// for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i];
// donorKernel.setArg<cu::Memory>(index++, buffer.getMemory());
// }
// if (tabulatedFunctionParams != NULL) {
// for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
// donorKernel.setArg<cu::Buffer>(index++, tabulatedFunctions[i]->getDevicePointer());
// donorKernel.setArg<cu::Buffer>(index++, tabulatedFunctionParams->getDevicePointer());
// }
// index = 0;
// acceptorKernel.setArg<cu::Buffer>(index++, cu.getForceBuffers().getDevicePointer());
// acceptorKernel.setArg<cu::Buffer>(index++, cu.getEnergyBuffer().getDevicePointer());
// acceptorKernel.setArg<cu::Buffer>(index++, cu.getPosq().getDevicePointer());
// acceptorKernel.setArg<cu::Buffer>(index++, acceptorExclusions->getDevicePointer());
// acceptorKernel.setArg<cu::Buffer>(index++, donors->getDevicePointer());
// acceptorKernel.setArg<cu::Buffer>(index++, acceptors->getDevicePointer());
// acceptorKernel.setArg<cu::Buffer>(index++, acceptorBufferIndices->getDevicePointer());
// acceptorKernel.setArg(index++, 3*CudaContext::ThreadBlockSize*sizeof(mm_float4), NULL);
// index += 2; // Periodic box size arguments are set when the kernel is executed.
// if (globals != NULL)
// acceptorKernel.setArg<cu::Buffer>(index++, globals->getDevicePointer());
// for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
// acceptorKernel.setArg<cu::Memory>(index++, buffer.getMemory());
// }
// for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i];
// acceptorKernel.setArg<cu::Memory>(index++, buffer.getMemory());
// }
// if (tabulatedFunctionParams != NULL) {
// for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
// acceptorKernel.setArg<cu::Buffer>(index++, tabulatedFunctions[i]->getDevicePointer());
// acceptorKernel.setArg<cu::Buffer>(index++, tabulatedFunctionParams->getDevicePointer());
// }
// }
// donorKernel.setArg<mm_float4>(8, cu.getPeriodicBoxSize());
// donorKernel.setArg<mm_float4>(9, cu.getInvPeriodicBoxSize());
// cu.executeKernel(donorKernel, max(numDonors, numAcceptors));
// acceptorKernel.setArg<mm_float4>(8, cu.getPeriodicBoxSize());
// acceptorKernel.setArg<mm_float4>(9, cu.getInvPeriodicBoxSize());
// cu.executeKernel(acceptorKernel, max(numDonors, numAcceptors));
// return 0.0;
//}
//
//void CudaCalcCustomHbondForceKernel::copyParametersToContext(ContextImpl& context, const CustomHbondForce& force) {
// cu.setAsCurrent();
// int numContexts = cu.getPlatformData().contexts.size();
// int startIndex = cu.getContextIndex()*force.getNumDonors()/numContexts;
// int endIndex = (cu.getContextIndex()+1)*force.getNumDonors()/numContexts;
// if (numDonors != endIndex-startIndex)
// throw OpenMMException("updateParametersInContext: The number of donors has changed");
// if (numAcceptors != force.getNumAcceptors())
// throw OpenMMException("updateParametersInContext: The number of acceptors has changed");
//
// // Record the per-donor parameters.
//
// vector<vector<cl_float> > donorParamVector(numDonors);
// vector<double> parameters;
// for (int i = 0; i < numDonors; i++) {
// int d1, d2, d3;
// force.getDonorParameters(startIndex+i, d1, d2, d3, parameters);
// donorParamVector[i].resize(parameters.size());
// for (int j = 0; j < (int) parameters.size(); j++)
// donorParamVector[i][j] = (cl_float) parameters[j];
// }
// donorParams->setParameterValues(donorParamVector);
//
// // Record the per-acceptor parameters.
//
// vector<vector<cl_float> > acceptorParamVector(numAcceptors);
// for (int i = 0; i < numAcceptors; i++) {
// int a1, a2, a3;
// force.getAcceptorParameters(i, a1, a2, a3, parameters);
// acceptorParamVector[i].resize(parameters.size());
// for (int j = 0; j < (int) parameters.size(); j++)
// acceptorParamVector[i][j] = (cl_float) parameters[j];
// }
// acceptorParams->setParameterValues(acceptorParamVector);
//
// // Mark that the current reordering may be invalid.
//
// cu.invalidateMolecules();
//}
class
CudaCustomHbondForceInfo
:
public
CudaForceInfo
{
public:
CudaCustomHbondForceInfo
(
const
CustomHbondForce
&
force
)
:
force
(
force
)
{
}
bool
areParticlesIdentical
(
int
particle1
,
int
particle2
)
{
return
true
;
}
int
getNumParticleGroups
()
{
return
force
.
getNumDonors
()
+
force
.
getNumAcceptors
()
+
force
.
getNumExclusions
();
}
void
getParticlesInGroup
(
int
index
,
vector
<
int
>&
particles
)
{
int
p1
,
p2
,
p3
;
vector
<
double
>
parameters
;
if
(
index
<
force
.
getNumDonors
())
{
force
.
getDonorParameters
(
index
,
p1
,
p2
,
p3
,
parameters
);
particles
.
clear
();
particles
.
push_back
(
p1
);
if
(
p2
>
-
1
)
particles
.
push_back
(
p2
);
if
(
p3
>
-
1
)
particles
.
push_back
(
p3
);
return
;
}
index
-=
force
.
getNumDonors
();
if
(
index
<
force
.
getNumAcceptors
())
{
force
.
getAcceptorParameters
(
index
,
p1
,
p2
,
p3
,
parameters
);
particles
.
clear
();
particles
.
push_back
(
p1
);
if
(
p2
>
-
1
)
particles
.
push_back
(
p2
);
if
(
p3
>
-
1
)
particles
.
push_back
(
p3
);
return
;
}
index
-=
force
.
getNumAcceptors
();
int
donor
,
acceptor
;
force
.
getExclusionParticles
(
index
,
donor
,
acceptor
);
particles
.
clear
();
force
.
getDonorParameters
(
donor
,
p1
,
p2
,
p3
,
parameters
);
particles
.
push_back
(
p1
);
if
(
p2
>
-
1
)
particles
.
push_back
(
p2
);
if
(
p3
>
-
1
)
particles
.
push_back
(
p3
);
force
.
getAcceptorParameters
(
acceptor
,
p1
,
p2
,
p3
,
parameters
);
particles
.
push_back
(
p1
);
if
(
p2
>
-
1
)
particles
.
push_back
(
p2
);
if
(
p3
>
-
1
)
particles
.
push_back
(
p3
);
}
bool
areGroupsIdentical
(
int
group1
,
int
group2
)
{
int
p1
,
p2
,
p3
;
vector
<
double
>
params1
,
params2
;
if
(
group1
<
force
.
getNumDonors
()
&&
group2
<
force
.
getNumDonors
())
{
force
.
getDonorParameters
(
group1
,
p1
,
p2
,
p3
,
params1
);
force
.
getDonorParameters
(
group2
,
p1
,
p2
,
p3
,
params2
);
return
(
params1
==
params2
&&
params1
==
params2
);
}
if
(
group1
<
force
.
getNumDonors
()
||
group2
<
force
.
getNumDonors
())
return
false
;
group1
-=
force
.
getNumDonors
();
group2
-=
force
.
getNumDonors
();
if
(
group1
<
force
.
getNumAcceptors
()
&&
group2
<
force
.
getNumAcceptors
())
{
force
.
getAcceptorParameters
(
group1
,
p1
,
p2
,
p3
,
params1
);
force
.
getAcceptorParameters
(
group2
,
p1
,
p2
,
p3
,
params2
);
return
(
params1
==
params2
&&
params1
==
params2
);
}
if
(
group1
<
force
.
getNumAcceptors
()
||
group2
<
force
.
getNumAcceptors
())
return
false
;
return
true
;
}
private:
const
CustomHbondForce
&
force
;
};
CudaCalcCustomHbondForceKernel
::~
CudaCalcCustomHbondForceKernel
()
{
cu
.
setAsCurrent
();
if
(
donorParams
!=
NULL
)
delete
donorParams
;
if
(
acceptorParams
!=
NULL
)
delete
acceptorParams
;
if
(
donors
!=
NULL
)
delete
donors
;
if
(
acceptors
!=
NULL
)
delete
acceptors
;
if
(
globals
!=
NULL
)
delete
globals
;
if
(
donorExclusions
!=
NULL
)
delete
donorExclusions
;
if
(
acceptorExclusions
!=
NULL
)
delete
acceptorExclusions
;
if
(
tabulatedFunctionParams
!=
NULL
)
delete
tabulatedFunctionParams
;
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
delete
tabulatedFunctions
[
i
];
}
static
void
addDonorAndAcceptorCode
(
stringstream
&
computeDonor
,
stringstream
&
computeAcceptor
,
const
string
&
value
)
{
computeDonor
<<
value
;
computeAcceptor
<<
value
;
}
static
void
applyDonorAndAcceptorForces
(
stringstream
&
applyToDonor
,
stringstream
&
applyToAcceptor
,
int
atom
,
const
string
&
value
)
{
string
forceNames
[]
=
{
"f1"
,
"f2"
,
"f3"
};
if
(
atom
<
3
)
applyToAcceptor
<<
forceNames
[
atom
]
<<
" += trim("
<<
value
<<
");
\n
"
;
else
applyToDonor
<<
forceNames
[
atom
-
3
]
<<
" += trim("
<<
value
<<
");
\n
"
;
}
void
CudaCalcCustomHbondForceKernel
::
initialize
(
const
System
&
system
,
const
CustomHbondForce
&
force
)
{
// Record the lists of donors and acceptors, and the parameters for each one.
cu
.
setAsCurrent
();
int
numContexts
=
cu
.
getPlatformData
().
contexts
.
size
();
int
startIndex
=
cu
.
getContextIndex
()
*
force
.
getNumDonors
()
/
numContexts
;
int
endIndex
=
(
cu
.
getContextIndex
()
+
1
)
*
force
.
getNumDonors
()
/
numContexts
;
numDonors
=
endIndex
-
startIndex
;
numAcceptors
=
force
.
getNumAcceptors
();
if
(
numDonors
==
0
||
numAcceptors
==
0
)
return
;
int
numParticles
=
system
.
getNumParticles
();
donors
=
CudaArray
::
create
<
int4
>
(
cu
,
numDonors
,
"customHbondDonors"
);
acceptors
=
CudaArray
::
create
<
int4
>
(
cu
,
numAcceptors
,
"customHbondAcceptors"
);
donorParams
=
new
CudaParameterSet
(
cu
,
force
.
getNumPerDonorParameters
(),
numDonors
,
"customHbondDonorParameters"
);
acceptorParams
=
new
CudaParameterSet
(
cu
,
force
.
getNumPerAcceptorParameters
(),
numAcceptors
,
"customHbondAcceptorParameters"
);
if
(
force
.
getNumGlobalParameters
()
>
0
)
globals
=
CudaArray
::
create
<
float
>
(
cu
,
force
.
getNumGlobalParameters
(),
"customHbondGlobals"
);
vector
<
vector
<
float
>
>
donorParamVector
(
numDonors
);
vector
<
int4
>
donorVector
(
numDonors
);
for
(
int
i
=
0
;
i
<
numDonors
;
i
++
)
{
vector
<
double
>
parameters
;
force
.
getDonorParameters
(
startIndex
+
i
,
donorVector
[
i
].
x
,
donorVector
[
i
].
y
,
donorVector
[
i
].
z
,
parameters
);
donorParamVector
[
i
].
resize
(
parameters
.
size
());
for
(
int
j
=
0
;
j
<
(
int
)
parameters
.
size
();
j
++
)
donorParamVector
[
i
][
j
]
=
(
float
)
parameters
[
j
];
}
donors
->
upload
(
donorVector
);
donorParams
->
setParameterValues
(
donorParamVector
);
vector
<
vector
<
float
>
>
acceptorParamVector
(
numAcceptors
);
vector
<
int4
>
acceptorVector
(
numAcceptors
);
for
(
int
i
=
0
;
i
<
numAcceptors
;
i
++
)
{
vector
<
double
>
parameters
;
force
.
getAcceptorParameters
(
i
,
acceptorVector
[
i
].
x
,
acceptorVector
[
i
].
y
,
acceptorVector
[
i
].
z
,
parameters
);
acceptorParamVector
[
i
].
resize
(
parameters
.
size
());
for
(
int
j
=
0
;
j
<
(
int
)
parameters
.
size
();
j
++
)
acceptorParamVector
[
i
][
j
]
=
(
float
)
parameters
[
j
];
}
acceptors
->
upload
(
acceptorVector
);
acceptorParams
->
setParameterValues
(
acceptorParamVector
);
cu
.
addForce
(
new
CudaCustomHbondForceInfo
(
force
));
// Record exclusions.
vector
<
int4
>
donorExclusionVector
(
numDonors
,
make_int4
(
-
1
,
-
1
,
-
1
,
-
1
));
vector
<
int4
>
acceptorExclusionVector
(
numAcceptors
,
make_int4
(
-
1
,
-
1
,
-
1
,
-
1
));
for
(
int
i
=
0
;
i
<
force
.
getNumExclusions
();
i
++
)
{
int
donor
,
acceptor
;
force
.
getExclusionParticles
(
i
,
donor
,
acceptor
);
if
(
donor
<
startIndex
||
donor
>=
endIndex
)
continue
;
donor
-=
startIndex
;
if
(
donorExclusionVector
[
donor
].
x
==
-
1
)
donorExclusionVector
[
donor
].
x
=
acceptor
;
else
if
(
donorExclusionVector
[
donor
].
y
==
-
1
)
donorExclusionVector
[
donor
].
y
=
acceptor
;
else
if
(
donorExclusionVector
[
donor
].
z
==
-
1
)
donorExclusionVector
[
donor
].
z
=
acceptor
;
else
if
(
donorExclusionVector
[
donor
].
w
==
-
1
)
donorExclusionVector
[
donor
].
w
=
acceptor
;
else
throw
OpenMMException
(
"CustomHbondForce: CudaPlatform does not support more than four exclusions per donor"
);
if
(
acceptorExclusionVector
[
acceptor
].
x
==
-
1
)
acceptorExclusionVector
[
acceptor
].
x
=
donor
;
else
if
(
acceptorExclusionVector
[
acceptor
].
y
==
-
1
)
acceptorExclusionVector
[
acceptor
].
y
=
donor
;
else
if
(
acceptorExclusionVector
[
acceptor
].
z
==
-
1
)
acceptorExclusionVector
[
acceptor
].
z
=
donor
;
else
if
(
acceptorExclusionVector
[
acceptor
].
w
==
-
1
)
acceptorExclusionVector
[
acceptor
].
w
=
donor
;
else
throw
OpenMMException
(
"CustomHbondForce: CudaPlatform does not support more than four exclusions per acceptor"
);
}
donorExclusions
=
CudaArray
::
create
<
int4
>
(
cu
,
numDonors
,
"customHbondDonorExclusions"
);
acceptorExclusions
=
CudaArray
::
create
<
int4
>
(
cu
,
numAcceptors
,
"customHbondAcceptorExclusions"
);
donorExclusions
->
upload
(
donorExclusionVector
);
acceptorExclusions
->
upload
(
acceptorExclusionVector
);
// Record the tabulated functions.
CudaExpressionUtilities
::
FunctionPlaceholder
fp
;
map
<
string
,
Lepton
::
CustomFunction
*>
functions
;
vector
<
pair
<
string
,
string
>
>
functionDefinitions
;
vector
<
float4
>
tabulatedFunctionParamsVec
(
force
.
getNumFunctions
());
stringstream
tableArgs
;
for
(
int
i
=
0
;
i
<
force
.
getNumFunctions
();
i
++
)
{
string
name
;
vector
<
double
>
values
;
double
min
,
max
;
force
.
getFunctionParameters
(
i
,
name
,
values
,
min
,
max
);
string
arrayName
=
"table"
+
cu
.
intToString
(
i
);
functionDefinitions
.
push_back
(
make_pair
(
name
,
arrayName
));
functions
[
name
]
=
&
fp
;
tabulatedFunctionParamsVec
[
i
]
=
make_float4
((
float
)
min
,
(
float
)
max
,
(
float
)
((
values
.
size
()
-
1
)
/
(
max
-
min
)),
(
float
)
values
.
size
()
-
2
);
vector
<
float4
>
f
=
cu
.
getExpressionUtilities
().
computeFunctionCoefficients
(
values
,
min
,
max
);
tabulatedFunctions
.
push_back
(
CudaArray
::
create
<
float4
>
(
cu
,
values
.
size
()
-
1
,
"TabulatedFunction"
));
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
upload
(
f
);
tableArgs
<<
", const float4* __restrict__ "
<<
arrayName
;
}
if
(
force
.
getNumFunctions
()
>
0
)
{
tabulatedFunctionParams
=
CudaArray
::
create
<
float4
>
(
cu
,
tabulatedFunctionParamsVec
.
size
(),
"tabulatedFunctionParameters"
);
tabulatedFunctionParams
->
upload
(
tabulatedFunctionParamsVec
);
tableArgs
<<
", const float4* __restrict__ functionParams"
;
}
// Record information about parameters.
globalParamNames
.
resize
(
force
.
getNumGlobalParameters
());
globalParamValues
.
resize
(
force
.
getNumGlobalParameters
());
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
globalParamNames
[
i
]
=
force
.
getGlobalParameterName
(
i
);
globalParamValues
[
i
]
=
(
float
)
force
.
getGlobalParameterDefaultValue
(
i
);
}
if
(
globals
!=
NULL
)
globals
->
upload
(
globalParamValues
);
map
<
string
,
string
>
variables
;
for
(
int
i
=
0
;
i
<
force
.
getNumPerDonorParameters
();
i
++
)
{
const
string
&
name
=
force
.
getPerDonorParameterName
(
i
);
variables
[
name
]
=
"donorParams"
+
donorParams
->
getParameterSuffix
(
i
);
}
for
(
int
i
=
0
;
i
<
force
.
getNumPerAcceptorParameters
();
i
++
)
{
const
string
&
name
=
force
.
getPerAcceptorParameterName
(
i
);
variables
[
name
]
=
"acceptorParams"
+
acceptorParams
->
getParameterSuffix
(
i
);
}
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
variables
[
name
]
=
"globals["
+
cu
.
intToString
(
i
)
+
"]"
;
}
// Now to generate the kernel. First, it needs to calculate all distances, angles,
// and dihedrals the expression depends on.
map
<
string
,
vector
<
int
>
>
distances
;
map
<
string
,
vector
<
int
>
>
angles
;
map
<
string
,
vector
<
int
>
>
dihedrals
;
Lepton
::
ParsedExpression
energyExpression
=
CustomHbondForceImpl
::
prepareExpression
(
force
,
functions
,
distances
,
angles
,
dihedrals
);
map
<
string
,
Lepton
::
ParsedExpression
>
forceExpressions
;
set
<
string
>
computedDeltas
;
computedDeltas
.
insert
(
"D1A1"
);
string
atomNames
[]
=
{
"A1"
,
"A2"
,
"A3"
,
"D1"
,
"D2"
,
"D3"
};
string
atomNamesLower
[]
=
{
"a1"
,
"a2"
,
"a3"
,
"d1"
,
"d2"
,
"d3"
};
stringstream
computeDonor
,
computeAcceptor
,
extraArgs
;
int
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
distances
.
begin
();
iter
!=
distances
.
end
();
++
iter
,
++
index
)
{
const
vector
<
int
>&
atoms
=
iter
->
second
;
string
deltaName
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
if
(
computedDeltas
.
count
(
deltaName
)
==
0
)
{
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 delta"
+
deltaName
+
" = delta("
+
atomNamesLower
[
atoms
[
0
]]
+
", "
+
atomNamesLower
[
atoms
[
1
]]
+
");
\n
"
);
computedDeltas
.
insert
(
deltaName
);
}
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real r_"
+
deltaName
+
" = SQRT(delta"
+
deltaName
+
".w);
\n
"
);
variables
[
iter
->
first
]
=
"r_"
+
deltaName
;
forceExpressions
[
"real dEdDistance"
+
cu
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
}
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
angles
.
begin
();
iter
!=
angles
.
end
();
++
iter
,
++
index
)
{
const
vector
<
int
>&
atoms
=
iter
->
second
;
string
deltaName1
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
0
]];
string
deltaName2
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
string
angleName
=
"angle_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 delta"
+
deltaName1
+
" = delta("
+
atomNamesLower
[
atoms
[
1
]]
+
", "
+
atomNamesLower
[
atoms
[
0
]]
+
");
\n
"
);
computedDeltas
.
insert
(
deltaName1
);
}
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 delta"
+
deltaName2
+
" = delta("
+
atomNamesLower
[
atoms
[
1
]]
+
", "
+
atomNamesLower
[
atoms
[
2
]]
+
");
\n
"
);
computedDeltas
.
insert
(
deltaName2
);
}
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real "
+
angleName
+
" = computeAngle(delta"
+
deltaName1
+
", delta"
+
deltaName2
+
");
\n
"
);
variables
[
iter
->
first
]
=
angleName
;
forceExpressions
[
"real dEdAngle"
+
cu
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
}
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
dihedrals
.
begin
();
iter
!=
dihedrals
.
end
();
++
iter
,
++
index
)
{
const
vector
<
int
>&
atoms
=
iter
->
second
;
string
deltaName1
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
string
deltaName2
=
atomNames
[
atoms
[
2
]]
+
atomNames
[
atoms
[
1
]];
string
deltaName3
=
atomNames
[
atoms
[
2
]]
+
atomNames
[
atoms
[
3
]];
string
crossName1
=
"cross_"
+
deltaName1
+
"_"
+
deltaName2
;
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
string
dihedralName
=
"dihedral_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]]
+
atomNames
[
atoms
[
3
]];
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 delta"
+
deltaName1
+
" = delta("
+
atomNamesLower
[
atoms
[
0
]]
+
", "
+
atomNamesLower
[
atoms
[
1
]]
+
");
\n
"
);
computedDeltas
.
insert
(
deltaName1
);
}
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 delta"
+
deltaName2
+
" = delta("
+
atomNamesLower
[
atoms
[
2
]]
+
", "
+
atomNamesLower
[
atoms
[
1
]]
+
");
\n
"
);
computedDeltas
.
insert
(
deltaName2
);
}
if
(
computedDeltas
.
count
(
deltaName3
)
==
0
)
{
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 delta"
+
deltaName3
+
" = delta("
+
atomNamesLower
[
atoms
[
2
]]
+
", "
+
atomNamesLower
[
atoms
[
3
]]
+
");
\n
"
);
computedDeltas
.
insert
(
deltaName3
);
}
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 "
+
crossName1
+
" = computeCross(delta"
+
deltaName1
+
", delta"
+
deltaName2
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 "
+
crossName2
+
" = computeCross(delta"
+
deltaName2
+
", delta"
+
deltaName3
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real "
+
dihedralName
+
" = computeAngle("
+
crossName1
+
", "
+
crossName2
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
dihedralName
+
" *= (delta"
+
deltaName1
+
".x*"
+
crossName2
+
".x + delta"
+
deltaName1
+
".y*"
+
crossName2
+
".y + delta"
+
deltaName1
+
".z*"
+
crossName2
+
".z < 0 ? -1 : 1);
\n
"
);
variables
[
iter
->
first
]
=
dihedralName
;
forceExpressions
[
"real dEdDihedral"
+
cu
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
}
// Next it needs to load parameters from global memory.
if
(
force
.
getNumGlobalParameters
()
>
0
)
extraArgs
<<
", const float* __restrict__ globals"
;
for
(
int
i
=
0
;
i
<
(
int
)
donorParams
->
getBuffers
().
size
();
i
++
)
{
const
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
donorParams
->
getBuffers
()[
i
];
extraArgs
<<
", const "
+
buffer
.
getType
()
+
"* __restrict__ donor"
+
buffer
.
getName
();
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
buffer
.
getType
()
+
" donorParams"
+
cu
.
intToString
(
i
+
1
)
+
" = donor"
+
buffer
.
getName
()
+
"[index];
\n
"
);
}
for
(
int
i
=
0
;
i
<
(
int
)
acceptorParams
->
getBuffers
().
size
();
i
++
)
{
const
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
acceptorParams
->
getBuffers
()[
i
];
extraArgs
<<
", const "
+
buffer
.
getType
()
+
"* __restrict__ acceptor"
+
buffer
.
getName
();
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
buffer
.
getType
()
+
" acceptorParams"
+
cu
.
intToString
(
i
+
1
)
+
" = acceptor"
+
buffer
.
getName
()
+
"[index];
\n
"
);
}
// Now evaluate the expressions.
computeAcceptor
<<
cu
.
getExpressionUtilities
().
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
"temp"
,
"functionParams"
);
forceExpressions
[
"energy += "
]
=
energyExpression
;
computeDonor
<<
cu
.
getExpressionUtilities
().
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
"temp"
,
"functionParams"
);
// Finally, apply forces to atoms.
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
distances
.
begin
();
iter
!=
distances
.
end
();
++
iter
,
++
index
)
{
const
vector
<
int
>&
atoms
=
iter
->
second
;
string
deltaName
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
string
value
=
"(dEdDistance"
+
cu
.
intToString
(
index
)
+
"/r_"
+
deltaName
+
")*delta"
+
deltaName
;
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
0
],
"-"
+
value
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
1
],
value
);
}
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
angles
.
begin
();
iter
!=
angles
.
end
();
++
iter
,
++
index
)
{
const
vector
<
int
>&
atoms
=
iter
->
second
;
string
deltaName1
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
0
]];
string
deltaName2
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"{
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real3 crossProd = cross(delta"
+
deltaName2
+
", delta"
+
deltaName1
+
");
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real lengthCross = max(SQRT(dot(crossProd,crossProd)), 1e-6f);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real3 deltaCross0 = -cross(trim(delta"
+
deltaName1
+
"), crossProd)*dEdAngle"
+
cu
.
intToString
(
index
)
+
"/(delta"
+
deltaName1
+
".w*lengthCross);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real3 deltaCross2 = cross(trim(delta"
+
deltaName2
+
"), crossProd)*dEdAngle"
+
cu
.
intToString
(
index
)
+
"/(delta"
+
deltaName2
+
".w*lengthCross);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real3 deltaCross1 = -(deltaCross0+deltaCross2);
\n
"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
0
],
"deltaCross0"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
1
],
"deltaCross1"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
2
],
"deltaCross2"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"}
\n
"
);
}
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
dihedrals
.
begin
();
iter
!=
dihedrals
.
end
();
++
iter
,
++
index
)
{
const
vector
<
int
>&
atoms
=
iter
->
second
;
string
deltaName1
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
string
deltaName2
=
atomNames
[
atoms
[
2
]]
+
atomNames
[
atoms
[
1
]];
string
deltaName3
=
atomNames
[
atoms
[
2
]]
+
atomNames
[
atoms
[
3
]];
string
crossName1
=
"cross_"
+
deltaName1
+
"_"
+
deltaName2
;
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"{
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real r = SQRT(delta"
+
deltaName2
+
".w);
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 ff;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.x = (-dEdDihedral"
+
cu
.
intToString
(
index
)
+
"*r)/"
+
crossName1
+
".w;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.y = (delta"
+
deltaName1
+
".x*delta"
+
deltaName2
+
".x + delta"
+
deltaName1
+
".y*delta"
+
deltaName2
+
".y + delta"
+
deltaName1
+
".z*delta"
+
deltaName2
+
".z)/delta"
+
deltaName2
+
".w;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.z = (delta"
+
deltaName3
+
".x*delta"
+
deltaName2
+
".x + delta"
+
deltaName3
+
".y*delta"
+
deltaName2
+
".y + delta"
+
deltaName3
+
".z*delta"
+
deltaName2
+
".z)/delta"
+
deltaName2
+
".w;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.w = (dEdDihedral"
+
cu
.
intToString
(
index
)
+
"*r)/"
+
crossName2
+
".w;
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 internalF0 = ff.x*"
+
crossName1
+
";
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 internalF3 = ff.w*"
+
crossName2
+
";
\n
"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 s = ff.y*internalF0 - ff.z*internalF3;
\n
"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
0
],
"internalF0"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
1
],
"s-internalF0"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
2
],
"-s-internalF3"
);
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
3
],
"internalF3"
);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"}
\n
"
);
}
// Generate the kernels.
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_DONOR_FORCE"
]
=
computeDonor
.
str
();
replacements
[
"COMPUTE_ACCEPTOR_FORCE"
]
=
computeAcceptor
.
str
();
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
map
<
string
,
string
>
defines
;
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
defines
[
"NUM_DONORS"
]
=
cu
.
intToString
(
numDonors
);
defines
[
"NUM_ACCEPTORS"
]
=
cu
.
intToString
(
numAcceptors
);
defines
[
"M_PI"
]
=
cu
.
doubleToString
(
M_PI
);
if
(
force
.
getNonbondedMethod
()
!=
CustomHbondForce
::
NoCutoff
)
{
defines
[
"USE_CUTOFF"
]
=
"1"
;
defines
[
"CUTOFF_SQUARED"
]
=
cu
.
doubleToString
(
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
());
}
if
(
force
.
getNonbondedMethod
()
!=
CustomHbondForce
::
NoCutoff
&&
force
.
getNonbondedMethod
()
!=
CustomHbondForce
::
CutoffNonPeriodic
)
defines
[
"USE_PERIODIC"
]
=
"1"
;
if
(
force
.
getNumExclusions
()
>
0
)
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
CUmodule
module
=
cu
.
createModule
(
cu
.
replaceStrings
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
customHbondForce
,
replacements
),
defines
);
donorKernel
=
cu
.
getKernel
(
module
,
"computeDonorForces"
);
acceptorKernel
=
cu
.
getKernel
(
module
,
"computeAcceptorForces"
);
}
double
CudaCalcCustomHbondForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
if
(
numDonors
==
0
||
numAcceptors
==
0
)
return
0.0
;
if
(
globals
!=
NULL
)
{
bool
changed
=
false
;
for
(
int
i
=
0
;
i
<
(
int
)
globalParamNames
.
size
();
i
++
)
{
float
value
=
(
float
)
context
.
getParameter
(
globalParamNames
[
i
]);
if
(
value
!=
globalParamValues
[
i
])
changed
=
true
;
globalParamValues
[
i
]
=
value
;
}
if
(
changed
)
globals
->
upload
(
globalParamValues
);
}
if
(
!
hasInitializedKernel
)
{
hasInitializedKernel
=
true
;
int
index
=
0
;
donorArgs
.
push_back
(
&
cu
.
getForce
().
getDevicePointer
());
donorArgs
.
push_back
(
&
cu
.
getEnergyBuffer
().
getDevicePointer
());
donorArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
donorArgs
.
push_back
(
&
donorExclusions
->
getDevicePointer
());
donorArgs
.
push_back
(
&
donors
->
getDevicePointer
());
donorArgs
.
push_back
(
&
acceptors
->
getDevicePointer
());
donorArgs
.
push_back
(
cu
.
getPeriodicBoxSizePointer
());
donorArgs
.
push_back
(
cu
.
getInvPeriodicBoxSizePointer
());
if
(
globals
!=
NULL
)
donorArgs
.
push_back
(
&
globals
->
getDevicePointer
());
for
(
int
i
=
0
;
i
<
(
int
)
donorParams
->
getBuffers
().
size
();
i
++
)
{
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
donorParams
->
getBuffers
()[
i
];
donorArgs
.
push_back
(
&
buffer
.
getMemory
());
}
for
(
int
i
=
0
;
i
<
(
int
)
acceptorParams
->
getBuffers
().
size
();
i
++
)
{
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
acceptorParams
->
getBuffers
()[
i
];
donorArgs
.
push_back
(
&
buffer
.
getMemory
());
}
if
(
tabulatedFunctionParams
!=
NULL
)
{
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
donorArgs
.
push_back
(
&
tabulatedFunctions
[
i
]
->
getDevicePointer
());
donorArgs
.
push_back
(
&
tabulatedFunctionParams
->
getDevicePointer
());
}
index
=
0
;
acceptorArgs
.
push_back
(
&
cu
.
getForce
().
getDevicePointer
());
acceptorArgs
.
push_back
(
&
cu
.
getEnergyBuffer
().
getDevicePointer
());
acceptorArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
acceptorArgs
.
push_back
(
&
acceptorExclusions
->
getDevicePointer
());
acceptorArgs
.
push_back
(
&
donors
->
getDevicePointer
());
acceptorArgs
.
push_back
(
&
acceptors
->
getDevicePointer
());
acceptorArgs
.
push_back
(
cu
.
getPeriodicBoxSizePointer
());
acceptorArgs
.
push_back
(
cu
.
getInvPeriodicBoxSizePointer
());
if
(
globals
!=
NULL
)
acceptorArgs
.
push_back
(
&
globals
->
getDevicePointer
());
for
(
int
i
=
0
;
i
<
(
int
)
donorParams
->
getBuffers
().
size
();
i
++
)
{
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
donorParams
->
getBuffers
()[
i
];
acceptorArgs
.
push_back
(
&
buffer
.
getMemory
());
}
for
(
int
i
=
0
;
i
<
(
int
)
acceptorParams
->
getBuffers
().
size
();
i
++
)
{
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
acceptorParams
->
getBuffers
()[
i
];
acceptorArgs
.
push_back
(
&
buffer
.
getMemory
());
}
if
(
tabulatedFunctionParams
!=
NULL
)
{
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
acceptorArgs
.
push_back
(
&
tabulatedFunctions
[
i
]
->
getDevicePointer
());
acceptorArgs
.
push_back
(
&
tabulatedFunctionParams
->
getDevicePointer
());
}
}
int
sharedMemorySize
=
3
*
CudaContext
::
ThreadBlockSize
*
sizeof
(
float4
);
cu
.
executeKernel
(
donorKernel
,
&
donorArgs
[
0
],
max
(
numDonors
,
numAcceptors
),
CudaContext
::
ThreadBlockSize
,
sharedMemorySize
);
cu
.
executeKernel
(
acceptorKernel
,
&
acceptorArgs
[
0
],
max
(
numDonors
,
numAcceptors
),
CudaContext
::
ThreadBlockSize
,
sharedMemorySize
);
return
0.0
;
}
void
CudaCalcCustomHbondForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomHbondForce
&
force
)
{
cu
.
setAsCurrent
();
int
numContexts
=
cu
.
getPlatformData
().
contexts
.
size
();
int
startIndex
=
cu
.
getContextIndex
()
*
force
.
getNumDonors
()
/
numContexts
;
int
endIndex
=
(
cu
.
getContextIndex
()
+
1
)
*
force
.
getNumDonors
()
/
numContexts
;
if
(
numDonors
!=
endIndex
-
startIndex
)
throw
OpenMMException
(
"updateParametersInContext: The number of donors has changed"
);
if
(
numAcceptors
!=
force
.
getNumAcceptors
())
throw
OpenMMException
(
"updateParametersInContext: The number of acceptors has changed"
);
// Record the per-donor parameters.
vector
<
vector
<
float
>
>
donorParamVector
(
numDonors
);
vector
<
double
>
parameters
;
for
(
int
i
=
0
;
i
<
numDonors
;
i
++
)
{
int
d1
,
d2
,
d3
;
force
.
getDonorParameters
(
startIndex
+
i
,
d1
,
d2
,
d3
,
parameters
);
donorParamVector
[
i
].
resize
(
parameters
.
size
());
for
(
int
j
=
0
;
j
<
(
int
)
parameters
.
size
();
j
++
)
donorParamVector
[
i
][
j
]
=
(
float
)
parameters
[
j
];
}
donorParams
->
setParameterValues
(
donorParamVector
);
// Record the per-acceptor parameters.
vector
<
vector
<
float
>
>
acceptorParamVector
(
numAcceptors
);
for
(
int
i
=
0
;
i
<
numAcceptors
;
i
++
)
{
int
a1
,
a2
,
a3
;
force
.
getAcceptorParameters
(
i
,
a1
,
a2
,
a3
,
parameters
);
acceptorParamVector
[
i
].
resize
(
parameters
.
size
());
for
(
int
j
=
0
;
j
<
(
int
)
parameters
.
size
();
j
++
)
acceptorParamVector
[
i
][
j
]
=
(
float
)
parameters
[
j
];
}
acceptorParams
->
setParameterValues
(
acceptorParamVector
);
// Mark that the current reordering may be invalid.
cu
.
invalidateMolecules
();
}
class
CudaCustomCompoundBondForceInfo
:
public
CudaForceInfo
{
public:
...
...
@@ -4288,679 +4255,784 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
// Update the time and step count.
double
dt
,
time
;
double
dt
,
time
;
if
(
cu
.
getUseDoublePrecision
())
{
double2
stepSize
;
cu
.
getIntegrationUtilities
().
getStepSize
().
download
(
&
stepSize
);
dt
=
stepSize
.
y
;
time
=
cu
.
getTime
()
+
dt
;
if
(
dt
==
maxStepSize
)
time
=
maxTime
;
// Avoid round-off error
}
else
{
float2
stepSize
;
cu
.
getIntegrationUtilities
().
getStepSize
().
download
(
&
stepSize
);
dt
=
stepSize
.
y
;
time
=
cu
.
getTime
()
+
dt
;
if
(
dt
==
maxStepSizeFloat
)
time
=
maxTime
;
// Avoid round-off error
}
cu
.
setTime
(
time
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
return
dt
;
}
CudaIntegrateVariableLangevinStepKernel
::~
CudaIntegrateVariableLangevinStepKernel
()
{
cu
.
setAsCurrent
();
if
(
params
!=
NULL
)
delete
params
;
}
void
CudaIntegrateVariableLangevinStepKernel
::
initialize
(
const
System
&
system
,
const
VariableLangevinIntegrator
&
integrator
)
{
cu
.
setAsCurrent
();
cu
.
getPlatformData
().
initializeContexts
(
system
);
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
langevin
,
defines
,
""
);
kernel1
=
cu
.
getKernel
(
module
,
"integrateLangevinPart1"
);
kernel2
=
cu
.
getKernel
(
module
,
"integrateLangevinPart2"
);
selectSizeKernel
=
cu
.
getKernel
(
module
,
"selectLangevinStepSize"
);
params
=
CudaArray
::
create
<
float
>
(
cu
,
3
,
"langevinParams"
);
blockSize
=
min
(
256
,
system
.
getNumParticles
());
blockSize
=
max
(
blockSize
,
params
->
getSize
());
}
double
CudaIntegrateVariableLangevinStepKernel
::
execute
(
ContextImpl
&
context
,
const
VariableLangevinIntegrator
&
integrator
,
double
maxTime
)
{
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
int
numAtoms
=
cu
.
getNumAtoms
();
// Select the step size to use.
double
maxStepSize
=
maxTime
-
cu
.
getTime
();
float
maxStepSizeFloat
=
(
float
)
maxStepSize
;
double
tol
=
integrator
.
getErrorTolerance
();
float
tolFloat
=
(
float
)
tol
;
double
tau
=
integrator
.
getFriction
()
==
0.0
?
0.0
:
1.0
/
integrator
.
getFriction
();
float
tauFloat
=
(
float
)
tau
;
double
kT
=
BOLTZ
*
integrator
.
getTemperature
();
float
kTFloat
=
(
float
)
kT
;
void
*
argsSelect
[]
=
{
cu
.
getUseDoublePrecision
()
?
(
void
*
)
&
maxStepSize
:
(
void
*
)
&
maxStepSizeFloat
,
cu
.
getUseDoublePrecision
()
?
(
void
*
)
&
tol
:
(
void
*
)
&
tolFloat
,
cu
.
getUseDoublePrecision
()
?
(
void
*
)
&
tau
:
(
void
*
)
&
tauFloat
,
cu
.
getUseDoublePrecision
()
?
(
void
*
)
&
kT
:
(
void
*
)
&
kTFloat
,
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
params
->
getDevicePointer
()};
int
sharedSize
=
blockSize
*
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
cu
.
executeKernel
(
selectSizeKernel
,
argsSelect
,
blockSize
,
blockSize
,
sharedSize
);
// Call the first integration kernel.
int
randomIndex
=
integration
.
prepareRandomNumbers
(
cu
.
getPaddedNumAtoms
());
void
*
args1
[]
=
{
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
params
->
getDevicePointer
(),
&
integration
.
getStepSize
().
getDevicePointer
(),
&
integration
.
getRandom
().
getDevicePointer
(),
&
randomIndex
};
cu
.
executeKernel
(
kernel1
,
args1
,
numAtoms
);
// Apply constraints.
integration
.
applyConstraints
(
integrator
.
getConstraintTolerance
());
// Call the second integration kernel.
void
*
args2
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getStepSize
().
getDevicePointer
()};
cu
.
executeKernel
(
kernel2
,
args2
,
numAtoms
);
integration
.
computeVirtualSites
();
// Update the time and step count.
double
dt
,
time
;
if
(
cu
.
getUseDoublePrecision
())
{
double2
stepSize
;
cu
.
getIntegrationUtilities
().
getStepSize
().
download
(
&
stepSize
);
dt
=
stepSize
.
y
;
time
=
cu
.
getTime
()
+
dt
;
if
(
dt
==
maxStepSize
)
time
=
maxTime
;
// Avoid round-off error
}
else
{
float2
stepSize
;
cu
.
getIntegrationUtilities
().
getStepSize
().
download
(
&
stepSize
);
dt
=
stepSize
.
y
;
time
=
cu
.
getTime
()
+
dt
;
if
(
dt
==
maxStepSizeFloat
)
time
=
maxTime
;
// Avoid round-off error
}
cu
.
setTime
(
time
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
return
dt
;
}
class
CudaIntegrateCustomStepKernel
::
ReorderListener
:
public
CudaContext
::
ReorderListener
{
public:
ReorderListener
(
CudaContext
&
cu
,
CudaParameterSet
&
perDofValues
,
vector
<
vector
<
float
>
>&
localPerDofValuesFloat
,
vector
<
vector
<
double
>
>&
localPerDofValuesDouble
,
bool
&
deviceValuesAreCurrent
)
:
cu
(
cu
),
perDofValues
(
perDofValues
),
localPerDofValuesFloat
(
localPerDofValuesFloat
),
localPerDofValuesDouble
(
localPerDofValuesDouble
),
deviceValuesAreCurrent
(
deviceValuesAreCurrent
)
{
int
numAtoms
=
cu
.
getNumAtoms
();
lastAtomOrder
.
resize
(
numAtoms
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
lastAtomOrder
[
i
]
=
cu
.
getAtomIndex
()[
i
];
}
void
execute
()
{
// Reorder the per-DOF variables to reflect the new atom order.
if
(
perDofValues
.
getNumParameters
()
==
0
)
return
;
int
numAtoms
=
cu
.
getNumAtoms
();
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
if
(
cu
.
getUseDoublePrecision
())
{
if
(
deviceValuesAreCurrent
)
perDofValues
.
getParameterValues
(
localPerDofValuesDouble
);
vector
<
vector
<
double
>
>
swap
(
3
*
numAtoms
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
swap
[
3
*
lastAtomOrder
[
i
]]
=
localPerDofValuesDouble
[
3
*
i
];
swap
[
3
*
lastAtomOrder
[
i
]
+
1
]
=
localPerDofValuesDouble
[
3
*
i
+
1
];
swap
[
3
*
lastAtomOrder
[
i
]
+
2
]
=
localPerDofValuesDouble
[
3
*
i
+
2
];
}
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
localPerDofValuesDouble
[
3
*
i
]
=
swap
[
3
*
order
[
i
]];
localPerDofValuesDouble
[
3
*
i
+
1
]
=
swap
[
3
*
order
[
i
]
+
1
];
localPerDofValuesDouble
[
3
*
i
+
2
]
=
swap
[
3
*
order
[
i
]
+
2
];
}
perDofValues
.
setParameterValues
(
localPerDofValuesDouble
);
}
else
{
if
(
deviceValuesAreCurrent
)
perDofValues
.
getParameterValues
(
localPerDofValuesFloat
);
vector
<
vector
<
float
>
>
swap
(
3
*
numAtoms
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
swap
[
3
*
lastAtomOrder
[
i
]]
=
localPerDofValuesFloat
[
3
*
i
];
swap
[
3
*
lastAtomOrder
[
i
]
+
1
]
=
localPerDofValuesFloat
[
3
*
i
+
1
];
swap
[
3
*
lastAtomOrder
[
i
]
+
2
]
=
localPerDofValuesFloat
[
3
*
i
+
2
];
}
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
localPerDofValuesFloat
[
3
*
i
]
=
swap
[
3
*
order
[
i
]];
localPerDofValuesFloat
[
3
*
i
+
1
]
=
swap
[
3
*
order
[
i
]
+
1
];
localPerDofValuesFloat
[
3
*
i
+
2
]
=
swap
[
3
*
order
[
i
]
+
2
];
}
perDofValues
.
setParameterValues
(
localPerDofValuesFloat
);
}
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
lastAtomOrder
[
i
]
=
order
[
i
];
deviceValuesAreCurrent
=
true
;
}
private:
CudaContext
&
cu
;
CudaParameterSet
&
perDofValues
;
vector
<
vector
<
float
>
>&
localPerDofValuesFloat
;
vector
<
vector
<
double
>
>&
localPerDofValuesDouble
;
bool
&
deviceValuesAreCurrent
;
vector
<
int
>
lastAtomOrder
;
};
CudaIntegrateCustomStepKernel
::~
CudaIntegrateCustomStepKernel
()
{
cu
.
setAsCurrent
();
if
(
globalValues
!=
NULL
)
delete
globalValues
;
if
(
contextParameterValues
!=
NULL
)
delete
contextParameterValues
;
if
(
sumBuffer
!=
NULL
)
delete
sumBuffer
;
if
(
energy
!=
NULL
)
delete
energy
;
if
(
uniformRandoms
!=
NULL
)
delete
uniformRandoms
;
if
(
randomSeed
!=
NULL
)
delete
randomSeed
;
if
(
perDofValues
!=
NULL
)
delete
perDofValues
;
}
void
CudaIntegrateCustomStepKernel
::
initialize
(
const
System
&
system
,
const
CustomIntegrator
&
integrator
)
{
cu
.
setAsCurrent
();
cu
.
getPlatformData
().
initializeContexts
(
system
);
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
numGlobalVariables
=
integrator
.
getNumGlobalVariables
();
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
globalValues
=
new
CudaArray
(
cu
,
max
(
1
,
numGlobalVariables
),
elementSize
,
"globalVariables"
);
sumBuffer
=
new
CudaArray
(
cu
,
3
*
system
.
getNumParticles
(),
elementSize
,
"sumBuffer"
);
energy
=
new
CudaArray
(
cu
,
1
,
elementSize
,
"energy"
);
perDofValues
=
new
CudaParameterSet
(
cu
,
integrator
.
getNumPerDofVariables
(),
3
*
system
.
getNumParticles
(),
"perDofVariables"
,
false
,
cu
.
getUseDoublePrecision
());
cu
.
addReorderListener
(
new
ReorderListener
(
cu
,
*
perDofValues
,
localPerDofValuesFloat
,
localPerDofValuesDouble
,
deviceValuesAreCurrent
));
prevStepSize
=
-
1.0
;
SimTKOpenMMUtilities
::
setRandomNumberSeed
(
integrator
.
getRandomNumberSeed
());
}
string
CudaIntegrateCustomStepKernel
::
createGlobalComputation
(
const
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
CustomIntegrator
&
integrator
,
const
string
&
energyName
)
{
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
if
(
variable
==
"dt"
)
expressions
[
"dt[0].y = "
]
=
expr
;
else
{
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
if
(
variable
==
integrator
.
getGlobalVariableName
(
i
))
expressions
[
"globals["
+
cu
.
intToString
(
i
)
+
"] = "
]
=
expr
;
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
if
(
variable
==
parameterNames
[
i
])
{
expressions
[
"params["
+
cu
.
intToString
(
i
)
+
"] = "
]
=
expr
;
modifiesParameters
=
true
;
}
}
if
(
expressions
.
size
()
==
0
)
throw
OpenMMException
(
"Unknown global variable: "
+
variable
);
map
<
string
,
string
>
variables
;
variables
[
"dt"
]
=
"dt[0].y"
;
variables
[
"uniform"
]
=
"uniform"
;
variables
[
"gaussian"
]
=
"gaussian"
;
variables
[
energyName
]
=
"energy[0]"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
cu
.
intToString
(
i
)
+
"]"
;
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
variables
[
parameterNames
[
i
]]
=
"params["
+
cu
.
intToString
(
i
)
+
"]"
;
vector
<
pair
<
string
,
string
>
>
functions
;
return
cu
.
getExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
}
string
CudaIntegrateCustomStepKernel
::
createPerDofComputation
(
const
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
int
component
,
CustomIntegrator
&
integrator
,
const
string
&
forceName
,
const
string
&
energyName
)
{
const
string
suffixes
[]
=
{
".x"
,
".y"
,
".z"
};
string
suffix
=
suffixes
[
component
];
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
if
(
variable
==
"x"
)
expressions
[
"position"
+
suffix
+
" = "
]
=
expr
;
else
if
(
variable
==
"v"
)
expressions
[
"velocity"
+
suffix
+
" = "
]
=
expr
;
else
if
(
variable
==
""
)
expressions
[
"sum[3*index+"
+
cu
.
intToString
(
component
)
+
"] = "
]
=
expr
;
else
{
for
(
int
i
=
0
;
i
<
integrator
.
getNumPerDofVariables
();
i
++
)
if
(
variable
==
integrator
.
getPerDofVariableName
(
i
))
expressions
[
"perDof"
+
suffix
.
substr
(
1
)
+
perDofValues
->
getParameterSuffix
(
i
)
+
" = "
]
=
expr
;
}
if
(
expressions
.
size
()
==
0
)
throw
OpenMMException
(
"Unknown per-DOF variable: "
+
variable
);
map
<
string
,
string
>
variables
;
variables
[
"x"
]
=
"position"
+
suffix
;
variables
[
"v"
]
=
"velocity"
+
suffix
;
variables
[
forceName
]
=
"f"
+
suffix
;
variables
[
"gaussian"
]
=
"gaussian"
+
suffix
;
variables
[
"uniform"
]
=
"uniform"
+
suffix
;
variables
[
"m"
]
=
"mass"
;
variables
[
"dt"
]
=
"stepSize"
;
variables
[
energyName
]
=
"energy[0]"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
cu
.
intToString
(
i
)
+
"]"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumPerDofVariables
();
i
++
)
variables
[
integrator
.
getPerDofVariableName
(
i
)]
=
"perDof"
+
suffix
.
substr
(
1
)
+
perDofValues
->
getParameterSuffix
(
i
);
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
variables
[
parameterNames
[
i
]]
=
"params["
+
cu
.
intToString
(
i
)
+
"]"
;
vector
<
pair
<
string
,
string
>
>
functions
;
return
cu
.
getExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
+
cu
.
intToString
(
component
)
+
"_"
,
""
,
"double"
);
}
void
CudaIntegrateCustomStepKernel
::
execute
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
)
{
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
int
numAtoms
=
cu
.
getNumAtoms
();
int
numSteps
=
integrator
.
getNumComputations
();
if
(
!
hasInitializedKernels
)
{
hasInitializedKernels
=
true
;
// Initialize various data structures.
const
map
<
string
,
double
>&
params
=
context
.
getParameters
();
if
(
cu
.
getUseDoublePrecision
())
{
contextParameterValues
=
CudaArray
::
create
<
double
>
(
cu
,
max
(
1
,
(
int
)
params
.
size
()),
"contextParameters"
);
contextValuesDouble
.
resize
(
contextParameterValues
->
getSize
());
for
(
map
<
string
,
double
>::
const_iterator
iter
=
params
.
begin
();
iter
!=
params
.
end
();
++
iter
)
{
contextValuesDouble
[
parameterNames
.
size
()]
=
iter
->
second
;
parameterNames
.
push_back
(
iter
->
first
);
}
contextParameterValues
->
upload
(
contextValuesDouble
);
}
else
{
contextParameterValues
=
CudaArray
::
create
<
float
>
(
cu
,
max
(
1
,
(
int
)
params
.
size
()),
"contextParameters"
);
contextValuesFloat
.
resize
(
contextParameterValues
->
getSize
());
for
(
map
<
string
,
double
>::
const_iterator
iter
=
params
.
begin
();
iter
!=
params
.
end
();
++
iter
)
{
contextValuesFloat
[
parameterNames
.
size
()]
=
(
float
)
iter
->
second
;
parameterNames
.
push_back
(
iter
->
first
);
}
contextParameterValues
->
upload
(
contextValuesFloat
);
}
kernels
.
resize
(
integrator
.
getNumComputations
());
kernelArgs
.
resize
(
integrator
.
getNumComputations
());
requiredGaussian
.
resize
(
integrator
.
getNumComputations
(),
0
);
requiredUniform
.
resize
(
integrator
.
getNumComputations
(),
0
);
needsForces
.
resize
(
numSteps
,
false
);
needsEnergy
.
resize
(
numSteps
,
false
);
forceGroup
.
resize
(
numSteps
,
-
2
);
invalidatesForces
.
resize
(
numSteps
,
false
);
merged
.
resize
(
numSteps
,
false
);
modifiesParameters
=
false
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
defines
[
"WORK_GROUP_SIZE"
]
=
cu
.
intToString
(
CudaContext
::
ThreadBlockSize
);
defines
[
"SUM_BUFFER_SIZE"
]
=
"0"
;
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
// Initialize the random number generator.
uniformRandoms
=
CudaArray
::
create
<
float4
>
(
cu
,
cu
.
getNumAtoms
(),
"uniformRandoms"
);
randomSeed
=
CudaArray
::
create
<
int4
>
(
cu
,
cu
.
getNumThreadBlocks
()
*
CudaContext
::
ThreadBlockSize
,
"randomSeed"
);
vector
<
int4
>
seed
(
randomSeed
->
getSize
());
unsigned
int
r
=
integrator
.
getRandomNumberSeed
()
+
1
;
for
(
int
i
=
0
;
i
<
randomSeed
->
getSize
();
i
++
)
{
seed
[
i
].
x
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
y
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
z
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
w
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
}
randomSeed
->
upload
(
seed
);
CUmodule
randomProgram
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
randomKernel
=
cu
.
getKernel
(
randomProgram
,
"generateRandomNumbers"
);
// Build a list of all variables that affect the forces, so we can tell which
// steps invalidate them.
set
<
string
>
affectsForce
;
affectsForce
.
insert
(
"x"
);
for
(
vector
<
ForceImpl
*>::
const_iterator
iter
=
context
.
getForceImpls
().
begin
();
iter
!=
context
.
getForceImpls
().
end
();
++
iter
)
{
const
map
<
string
,
double
>
params
=
(
*
iter
)
->
getDefaultParameters
();
for
(
map
<
string
,
double
>::
const_iterator
param
=
params
.
begin
();
param
!=
params
.
end
();
++
param
)
affectsForce
.
insert
(
param
->
first
);
}
// Record information about all the computation steps.
stepType
.
resize
(
numSteps
);
vector
<
string
>
variable
(
numSteps
);
vector
<
Lepton
::
ParsedExpression
>
expression
(
numSteps
);
vector
<
string
>
forceGroupName
;
vector
<
string
>
energyGroupName
;
for
(
int
i
=
0
;
i
<
32
;
i
++
)
{
stringstream
fname
;
fname
<<
"f"
<<
i
;
forceGroupName
.
push_back
(
fname
.
str
());
stringstream
ename
;
ename
<<
"energy"
<<
i
;
energyGroupName
.
push_back
(
ename
.
str
());
}
vector
<
string
>
forceName
(
numSteps
,
"f"
);
vector
<
string
>
energyName
(
numSteps
,
"energy"
);
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
string
expr
;
integrator
.
getComputationStep
(
step
,
stepType
[
step
],
variable
[
step
],
expr
);
if
(
expr
.
size
()
>
0
)
{
expression
[
step
]
=
Lepton
::
Parser
::
parse
(
expr
).
optimize
();
if
(
usesVariable
(
expression
[
step
],
"f"
))
{
needsForces
[
step
]
=
true
;
forceGroup
[
step
]
=
-
1
;
}
if
(
usesVariable
(
expression
[
step
],
"energy"
))
{
needsEnergy
[
step
]
=
true
;
forceGroup
[
step
]
=
-
1
;
}
for
(
int
i
=
0
;
i
<
32
;
i
++
)
{
if
(
usesVariable
(
expression
[
step
],
forceGroupName
[
i
]))
{
if
(
forceGroup
[
step
]
!=
-
2
)
throw
OpenMMException
(
"A single computation step cannot depend on multiple force groups"
);
needsForces
[
step
]
=
true
;
forceGroup
[
step
]
=
1
<<
i
;
forceName
[
step
]
=
forceGroupName
[
i
];
}
if
(
usesVariable
(
expression
[
step
],
energyGroupName
[
i
]))
{
if
(
forceGroup
[
step
]
!=
-
2
)
throw
OpenMMException
(
"A single computation step cannot depend on multiple force groups"
);
needsEnergy
[
step
]
=
true
;
forceGroup
[
step
]
=
1
<<
i
;
energyName
[
step
]
=
energyGroupName
[
i
];
}
}
}
invalidatesForces
[
step
]
=
(
stepType
[
step
]
==
CustomIntegrator
::
ConstrainPositions
||
affectsForce
.
find
(
variable
[
step
])
!=
affectsForce
.
end
());
if
(
forceGroup
[
step
]
==
-
2
&&
step
>
0
)
forceGroup
[
step
]
=
forceGroup
[
step
-
1
];
}
// Determine how each step will represent the position (as just a value, or a value plus a delta).
vector
<
bool
>
storePosAsDelta
(
numSteps
,
false
);
vector
<
bool
>
loadPosAsDelta
(
numSteps
,
false
);
bool
beforeConstrain
=
false
;
for
(
int
step
=
numSteps
-
1
;
step
>=
0
;
step
--
)
{
if
(
stepType
[
step
]
==
CustomIntegrator
::
ConstrainPositions
)
beforeConstrain
=
true
;
else
if
(
stepType
[
step
]
==
CustomIntegrator
::
ComputePerDof
&&
variable
[
step
]
==
"x"
&&
beforeConstrain
)
storePosAsDelta
[
step
]
=
true
;
}
bool
storedAsDelta
=
false
;
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
loadPosAsDelta
[
step
]
=
storedAsDelta
;
if
(
storePosAsDelta
[
step
]
==
true
)
storedAsDelta
=
true
;
if
(
stepType
[
step
]
==
CustomIntegrator
::
ConstrainPositions
)
storedAsDelta
=
false
;
}
// Identify steps that can be merged into a single kernel.
for
(
int
step
=
1
;
step
<
numSteps
;
step
++
)
{
if
(
needsForces
[
step
]
||
needsEnergy
[
step
])
continue
;
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputeGlobal
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputeGlobal
)
merged
[
step
]
=
true
;
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputePerDof
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputePerDof
&&
!
usesVariable
(
expression
[
step
],
"uniform"
))
merged
[
step
]
=
true
;
}
// Loop over all steps and create the kernels for them.
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
if
((
stepType
[
step
]
==
CustomIntegrator
::
ComputePerDof
||
stepType
[
step
]
==
CustomIntegrator
::
ComputeSum
)
&&
!
merged
[
step
])
{
// Compute a per-DOF value.
stringstream
compute
;
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
{
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
perDofValues
->
getBuffers
()[
i
];
compute
<<
buffer
.
getType
()
<<
" perDofx"
<<
cu
.
intToString
(
i
+
1
)
<<
" = perDofValues"
<<
cu
.
intToString
(
i
+
1
)
<<
"[3*index];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" perDofy"
<<
cu
.
intToString
(
i
+
1
)
<<
" = perDofValues"
<<
cu
.
intToString
(
i
+
1
)
<<
"[3*index+1];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" perDofz"
<<
cu
.
intToString
(
i
+
1
)
<<
" = perDofValues"
<<
cu
.
intToString
(
i
+
1
)
<<
"[3*index+2];
\n
"
;
}
int
numGaussian
=
0
,
numUniform
=
0
;
for
(
int
j
=
step
;
j
<
numSteps
&&
(
j
==
step
||
merged
[
j
]);
j
++
)
{
compute
<<
"{
\n
"
;
for
(
int
i
=
0
;
i
<
3
;
i
++
)
compute
<<
createPerDofComputation
(
stepType
[
j
]
==
CustomIntegrator
::
ComputePerDof
?
variable
[
j
]
:
""
,
expression
[
j
],
i
,
integrator
,
forceName
[
j
],
energyName
[
j
]);
if
(
variable
[
j
]
==
"x"
)
{
if
(
storePosAsDelta
[
j
])
compute
<<
"posDelta[index] = convertFromDouble4(position-convertToDouble4(posq[index]));
\n
"
;
else
compute
<<
"posq[index] = convertFromDouble4(position);
\n
"
;
}
else
if
(
variable
[
j
]
==
"v"
)
compute
<<
"velm[index] = convertFromDouble4(velocity);
\n
"
;
else
{
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
{
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
perDofValues
->
getBuffers
()[
i
];
compute
<<
"perDofValues"
<<
cu
.
intToString
(
i
+
1
)
<<
"[3*index] = perDofx"
<<
cu
.
intToString
(
i
+
1
)
<<
";
\n
"
;
compute
<<
"perDofValues"
<<
cu
.
intToString
(
i
+
1
)
<<
"[3*index+1] = perDofy"
<<
cu
.
intToString
(
i
+
1
)
<<
";
\n
"
;
compute
<<
"perDofValues"
<<
cu
.
intToString
(
i
+
1
)
<<
"[3*index+2] = perDofz"
<<
cu
.
intToString
(
i
+
1
)
<<
";
\n
"
;
}
}
compute
<<
"}
\n
"
;
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"gaussian"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"uniform"
);
}
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_STEP"
]
=
compute
.
str
();
stringstream
args
;
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
{
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
perDofValues
->
getBuffers
()[
i
];
string
valueName
=
"perDofValues"
+
cu
.
intToString
(
i
+
1
);
args
<<
", "
<<
buffer
.
getType
()
<<
"* __restrict__ "
<<
valueName
;
}
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
if
(
loadPosAsDelta
[
step
])
defines
[
"LOAD_POS_AS_DELTA"
]
=
"1"
;
else
if
(
defines
.
find
(
"LOAD_POS_AS_DELTA"
)
!=
defines
.
end
())
defines
.
erase
(
"LOAD_POS_AS_DELTA"
);
CUmodule
module
=
cu
.
createModule
(
cu
.
replaceStrings
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
customIntegratorPerDof
,
replacements
),
defines
);
CUfunction
kernel
=
cu
.
getKernel
(
module
,
"computePerDof"
);
kernels
[
step
].
push_back
(
kernel
);
requiredGaussian
[
step
]
=
numGaussian
;
requiredUniform
[
step
]
=
numUniform
;
vector
<
void
*>
args1
;
args1
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
args1
.
push_back
(
&
integration
.
getPosDelta
().
getDevicePointer
());
args1
.
push_back
(
&
cu
.
getVelm
().
getDevicePointer
());
args1
.
push_back
(
&
cu
.
getForce
().
getDevicePointer
());
args1
.
push_back
(
&
integration
.
getStepSize
().
getDevicePointer
());
args1
.
push_back
(
&
globalValues
->
getDevicePointer
());
args1
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
args1
.
push_back
(
&
sumBuffer
->
getDevicePointer
());
args1
.
push_back
(
&
integration
.
getRandom
().
getDevicePointer
());
args1
.
push_back
(
NULL
);
args1
.
push_back
(
&
uniformRandoms
->
getDevicePointer
());
args1
.
push_back
(
&
energy
->
getDevicePointer
());
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
args1
.
push_back
(
&
perDofValues
->
getBuffers
()[
i
].
getMemory
());
kernelArgs
[
step
].
push_back
(
args1
);
if
(
stepType
[
step
]
==
CustomIntegrator
::
ComputeSum
)
{
// Create a second kernel for this step that sums the values.
vector
<
void
*>
args2
;
args2
.
push_back
(
&
sumBuffer
->
getDevicePointer
());
bool
found
=
false
;
for
(
int
j
=
0
;
j
<
integrator
.
getNumGlobalVariables
()
&&
!
found
;
j
++
)
if
(
variable
[
step
]
==
integrator
.
getGlobalVariableName
(
j
))
{
args2
.
push_back
(
&
globalValues
->
getDevicePointer
());
defines
[
"SUM_OUTPUT_INDEX"
]
=
cu
.
intToString
(
j
);
found
=
true
;
}
for
(
int
j
=
0
;
j
<
(
int
)
parameterNames
.
size
()
&&
!
found
;
j
++
)
if
(
variable
[
step
]
==
parameterNames
[
j
])
{
args2
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
defines
[
"SUM_OUTPUT_INDEX"
]
=
cu
.
intToString
(
j
);
found
=
true
;
modifiesParameters
=
true
;
}
if
(
!
found
)
throw
OpenMMException
(
"Unknown global variable: "
+
variable
[
step
]);
defines
[
"SUM_BUFFER_SIZE"
]
=
cu
.
intToString
(
3
*
numAtoms
);
module
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
kernel
=
cu
.
getKernel
(
module
,
"computeSum"
);
kernels
[
step
].
push_back
(
kernel
);
kernelArgs
[
step
].
push_back
(
args2
);
}
}
else
if
(
stepType
[
step
]
==
CustomIntegrator
::
ComputeGlobal
&&
!
merged
[
step
])
{
// Compute a global value.
stringstream
compute
;
for
(
int
i
=
step
;
i
<
numSteps
&&
(
i
==
step
||
merged
[
i
]);
i
++
)
compute
<<
"{
\n
"
<<
createGlobalComputation
(
variable
[
i
],
expression
[
i
],
integrator
,
energyName
[
i
])
<<
"}
\n
"
;
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_STEP"
]
=
compute
.
str
();
CUmodule
module
=
cu
.
createModule
(
cu
.
replaceStrings
(
CudaKernelSources
::
customIntegratorGlobal
,
replacements
),
defines
);
CUfunction
kernel
=
cu
.
getKernel
(
module
,
"computeGlobal"
);
kernels
[
step
].
push_back
(
kernel
);
vector
<
void
*>
args
;
args
.
push_back
(
&
integration
.
getStepSize
().
getDevicePointer
());
args
.
push_back
(
&
globalValues
->
getDevicePointer
());
args
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
args
.
push_back
(
NULL
);
args
.
push_back
(
NULL
);
args
.
push_back
(
&
energy
->
getDevicePointer
());
kernelArgs
[
step
].
push_back
(
args
);
}
else
if
(
stepType
[
step
]
==
CustomIntegrator
::
ConstrainPositions
)
{
// Apply position constraints.
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
CUfunction
kernel
=
cu
.
getKernel
(
module
,
"applyPositionDeltas"
);
kernels
[
step
].
push_back
(
kernel
);
vector
<
void
*>
args
;
args
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
args
.
push_back
(
&
integration
.
getPosDelta
().
getDevicePointer
());
kernelArgs
[
step
].
push_back
(
args
);
}
}
// Create the kernel for summing energy.
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
defines
[
"SUM_BUFFER_SIZE"
]
=
cu
.
intToString
(
cu
.
getEnergyBuffer
().
getSize
());
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
sumEnergyKernel
=
cu
.
getKernel
(
module
,
"computeSum"
);
}
// Make sure all values (variables, parameters, etc.) stored on the device are up to date.
if
(
!
deviceValuesAreCurrent
)
{
if
(
cu
.
getUseDoublePrecision
())
perDofValues
->
setParameterValues
(
localPerDofValuesDouble
);
else
perDofValues
->
setParameterValues
(
localPerDofValuesFloat
);
deviceValuesAreCurrent
=
true
;
}
localValuesAreCurrent
=
false
;
double
stepSize
=
integrator
.
getStepSize
();
if
(
stepSize
!=
prevStepSize
)
{
if
(
cu
.
getUseDoublePrecision
())
{
double
size
[]
=
{
0
,
stepSize
};
integration
.
getStepSize
().
upload
(
size
);
}
else
{
float
size
[]
=
{
0
,
(
float
)
stepSize
};
integration
.
getStepSize
().
upload
(
size
);
}
prevStepSize
=
stepSize
;
}
bool
paramsChanged
=
false
;
if
(
cu
.
getUseDoublePrecision
())
{
double2
stepSize
;
cu
.
getIntegrationUtilities
().
getStepSize
().
download
(
&
stepSize
);
dt
=
stepSize
.
y
;
time
=
cu
.
getTime
()
+
dt
;
if
(
dt
==
maxStepSize
)
time
=
maxTime
;
// Avoid round-off error
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
double
value
=
context
.
getParameter
(
parameterNames
[
i
]);
if
(
value
!=
contextValuesDouble
[
i
])
{
contextValuesDouble
[
i
]
=
value
;
paramsChanged
=
true
;
}
}
if
(
paramsChanged
)
contextParameterValues
->
upload
(
contextValuesDouble
);
}
else
{
float2
stepSize
;
cu
.
getIntegrationUtilities
().
getStepSize
().
download
(
&
stepSize
);
dt
=
stepSize
.
y
;
time
=
cu
.
getTime
()
+
dt
;
if
(
dt
==
maxStepSizeFloat
)
time
=
maxTime
;
// Avoid round-off error
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
float
value
=
(
float
)
context
.
getParameter
(
parameterNames
[
i
]);
if
(
value
!=
contextValuesFloat
[
i
])
{
contextValuesFloat
[
i
]
=
value
;
paramsChanged
=
true
;
}
}
if
(
paramsChanged
)
contextParameterValues
->
upload
(
contextValuesFloat
);
}
// Loop over computation steps in the integrator and execute them.
void
*
randomArgs
[]
=
{
&
uniformRandoms
->
getDevicePointer
(),
&
randomSeed
->
getDevicePointer
()};
for
(
int
i
=
0
;
i
<
numSteps
;
i
++
)
{
if
((
needsForces
[
i
]
||
needsEnergy
[
i
])
&&
(
!
forcesAreValid
||
context
.
getLastForceGroups
()
!=
forceGroup
[
i
]))
{
// Recompute forces and/or energy. Figure out what is actually needed
// between now and the next time they get invalidated again.
bool
computeForce
=
false
,
computeEnergy
=
false
;
for
(
int
j
=
i
;
;
j
++
)
{
if
(
needsForces
[
j
])
computeForce
=
true
;
if
(
needsEnergy
[
j
])
computeEnergy
=
true
;
if
(
invalidatesForces
[
j
])
break
;
if
(
j
==
numSteps
-
1
)
j
=
-
1
;
if
(
j
==
i
-
1
)
break
;
}
recordChangedParameters
(
context
);
context
.
calcForcesAndEnergy
(
computeForce
,
computeEnergy
,
forceGroup
[
i
]);
if
(
computeEnergy
)
{
void
*
args
[]
=
{
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
energy
->
getDevicePointer
()};
cu
.
executeKernel
(
sumEnergyKernel
,
&
args
[
0
],
CudaContext
::
ThreadBlockSize
,
CudaContext
::
ThreadBlockSize
);
}
forcesAreValid
=
true
;
}
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputePerDof
&&
!
merged
[
i
])
{
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
kernelArgs
[
i
][
0
][
9
]
=
&
randomIndex
;
if
(
requiredUniform
[
i
]
>
0
)
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeGlobal
&&
!
merged
[
i
])
{
float
uniform
=
SimTKOpenMMUtilities
::
getUniformlyDistributedRandomNumber
();
float
gauss
=
SimTKOpenMMUtilities
::
getNormallyDistributedRandomNumber
();
kernelArgs
[
i
][
0
][
3
]
=
&
uniform
;
kernelArgs
[
i
][
0
][
4
]
=
&
gauss
;
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
1
,
1
);
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeSum
)
{
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
kernelArgs
[
i
][
0
][
9
]
=
&
randomIndex
;
if
(
requiredUniform
[
i
]
>
0
)
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
1
],
&
kernelArgs
[
i
][
1
][
0
],
CudaContext
::
ThreadBlockSize
,
CudaContext
::
ThreadBlockSize
);
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
UpdateContextState
)
{
recordChangedParameters
(
context
);
context
.
updateContextState
();
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ConstrainPositions
)
{
cu
.
getIntegrationUtilities
().
applyConstraints
(
integrator
.
getConstraintTolerance
());
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
cu
.
getIntegrationUtilities
().
computeVirtualSites
();
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ConstrainVelocities
)
{
cu
.
getIntegrationUtilities
().
applyVelocityConstraints
(
integrator
.
getConstraintTolerance
());
}
if
(
invalidatesForces
[
i
])
forcesAreValid
=
false
;
}
cu
.
setTime
(
time
);
recordChangedParameters
(
context
);
// Update the time and step count.
cu
.
setTime
(
cu
.
getTime
()
+
stepSize
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
return
dt
;
}
CudaIntegrateVariableLangevinStepKernel
::~
CudaIntegrateVariableLangevinStepKernel
()
{
cu
.
setAsCurrent
();
if
(
params
!=
NULL
)
delete
params
;
void
CudaIntegrateCustomStepKernel
::
recordChangedParameters
(
ContextImpl
&
context
)
{
if
(
!
modifiesParameters
)
return
;
if
(
cu
.
getUseDoublePrecision
())
{
contextParameterValues
->
download
(
contextValuesDouble
);
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
double
value
=
context
.
getParameter
(
parameterNames
[
i
]);
if
(
value
!=
contextValuesDouble
[
i
])
context
.
setParameter
(
parameterNames
[
i
],
contextValuesDouble
[
i
]);
}
}
else
{
contextParameterValues
->
download
(
contextValuesFloat
);
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
float
value
=
(
float
)
context
.
getParameter
(
parameterNames
[
i
]);
if
(
value
!=
contextValuesFloat
[
i
])
context
.
setParameter
(
parameterNames
[
i
],
contextValuesFloat
[
i
]);
}
}
}
void
CudaIntegrateVariableLangevinStepKernel
::
initialize
(
const
System
&
system
,
const
VariableLangevinIntegrator
&
integrator
)
{
cu
.
setAsCurrent
();
cu
.
getPlatformData
().
initializeContexts
(
system
);
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
langevin
,
defines
,
""
);
kernel1
=
cu
.
getKernel
(
module
,
"integrateLangevinPart1"
);
kernel2
=
cu
.
getKernel
(
module
,
"integrateLangevinPart2"
);
selectSizeKernel
=
cu
.
getKernel
(
module
,
"selectLangevinStepSize"
);
params
=
CudaArray
::
create
<
float
>
(
cu
,
3
,
"langevinParams"
);
blockSize
=
min
(
256
,
system
.
getNumParticles
());
blockSize
=
max
(
blockSize
,
params
->
getSize
());
void
CudaIntegrateCustomStepKernel
::
getGlobalVariables
(
ContextImpl
&
context
,
vector
<
double
>&
values
)
const
{
values
.
resize
(
numGlobalVariables
);
if
(
numGlobalVariables
==
0
)
return
;
if
(
cu
.
getUseDoublePrecision
())
globalValues
->
download
(
values
);
else
{
vector
<
float
>
buffer
;
globalValues
->
download
(
buffer
);
for
(
int
i
=
0
;
i
<
numGlobalVariables
;
i
++
)
values
[
i
]
=
buffer
[
i
];
}
}
double
CudaIntegrateVariableLangevinStepKernel
::
execute
(
ContextImpl
&
context
,
const
VariableLangevinIntegrator
&
integrator
,
double
maxTime
)
{
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
int
numAtoms
=
cu
.
getNumAtoms
();
// Select the step size to use.
double
maxStepSize
=
maxTime
-
cu
.
getTime
();
float
maxStepSizeFloat
=
(
float
)
maxStepSize
;
double
tol
=
integrator
.
getErrorTolerance
();
float
tolFloat
=
(
float
)
tol
;
double
tau
=
integrator
.
getFriction
()
==
0.0
?
0.0
:
1.0
/
integrator
.
getFriction
();
float
tauFloat
=
(
float
)
tau
;
double
kT
=
BOLTZ
*
integrator
.
getTemperature
();
float
kTFloat
=
(
float
)
kT
;
void
*
argsSelect
[]
=
{
cu
.
getUseDoublePrecision
()
?
(
void
*
)
&
maxStepSize
:
(
void
*
)
&
maxStepSizeFloat
,
cu
.
getUseDoublePrecision
()
?
(
void
*
)
&
tol
:
(
void
*
)
&
tolFloat
,
cu
.
getUseDoublePrecision
()
?
(
void
*
)
&
tau
:
(
void
*
)
&
tauFloat
,
cu
.
getUseDoublePrecision
()
?
(
void
*
)
&
kT
:
(
void
*
)
&
kTFloat
,
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
params
->
getDevicePointer
()};
int
sharedSize
=
blockSize
*
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
cu
.
executeKernel
(
selectSizeKernel
,
argsSelect
,
blockSize
,
blockSize
,
sharedSize
);
// Call the first integration kernel.
int
randomIndex
=
integration
.
prepareRandomNumbers
(
cu
.
getPaddedNumAtoms
());
void
*
args1
[]
=
{
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
params
->
getDevicePointer
(),
&
integration
.
getStepSize
().
getDevicePointer
(),
&
integration
.
getRandom
().
getDevicePointer
(),
&
randomIndex
};
cu
.
executeKernel
(
kernel1
,
args1
,
numAtoms
);
// Apply constraints.
integration
.
applyConstraints
(
integrator
.
getConstraintTolerance
());
// Call the second integration kernel.
void
*
args2
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getStepSize
().
getDevicePointer
()};
cu
.
executeKernel
(
kernel2
,
args2
,
numAtoms
);
integration
.
computeVirtualSites
();
// Update the time and step count.
void
CudaIntegrateCustomStepKernel
::
setGlobalVariables
(
ContextImpl
&
context
,
const
vector
<
double
>&
values
)
{
if
(
numGlobalVariables
==
0
)
return
;
if
(
cu
.
getUseDoublePrecision
())
globalValues
->
upload
(
values
);
else
{
vector
<
float
>
buffer
(
numGlobalVariables
);
for
(
int
i
=
0
;
i
<
numGlobalVariables
;
i
++
)
buffer
[
i
]
=
(
float
)
values
[
i
];
globalValues
->
upload
(
buffer
);
}
}
double
dt
,
time
;
void
CudaIntegrateCustomStepKernel
::
getPerDofVariable
(
ContextImpl
&
context
,
int
variable
,
vector
<
Vec3
>&
values
)
const
{
values
.
resize
(
perDofValues
->
getNumObjects
()
/
3
);
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
if
(
cu
.
getUseDoublePrecision
())
{
double2
stepSize
;
cu
.
getIntegrationUtilities
().
getStepSize
().
download
(
&
stepSize
);
dt
=
stepSize
.
y
;
time
=
cu
.
getTime
()
+
dt
;
if
(
dt
==
maxStepSize
)
time
=
maxTime
;
// Avoid round-off error
if
(
!
localValuesAreCurrent
)
{
perDofValues
->
getParameterValues
(
localPerDofValuesDouble
);
localValuesAreCurrent
=
true
;
}
for
(
int
i
=
0
;
i
<
(
int
)
values
.
size
();
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
values
[
order
[
i
]][
j
]
=
localPerDofValuesDouble
[
3
*
i
+
j
][
variable
];
}
else
{
float2
stepSize
;
cu
.
getIntegrationUtilities
().
getStepSize
().
download
(
&
stepSize
);
dt
=
stepSize
.
y
;
time
=
cu
.
getTime
()
+
dt
;
if
(
dt
==
maxStepSizeFloat
)
time
=
maxTime
;
// Avoid round-off error
if
(
!
localValuesAreCurrent
)
{
perDofValues
->
getParameterValues
(
localPerDofValuesFloat
);
localValuesAreCurrent
=
true
;
}
for
(
int
i
=
0
;
i
<
(
int
)
values
.
size
();
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
values
[
order
[
i
]][
j
]
=
localPerDofValuesFloat
[
3
*
i
+
j
][
variable
];
}
cu
.
setTime
(
time
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
return
dt
;
}
//class CudaIntegrateCustomStepKernel::ReorderListener : public CudaContext::ReorderListener {
//public:
// ReorderListener(CudaContext& cu, CudaParameterSet& perDofValues, vector<vector<cl_float> >& localPerDofValues, bool& deviceValuesAreCurrent) :
// cu(cu), perDofValues(perDofValues), localPerDofValues(localPerDofValues), deviceValuesAreCurrent(deviceValuesAreCurrent) {
// int numAtoms = cu.getNumAtoms();
// lastAtomOrder.resize(numAtoms);
// for (int i = 0; i < numAtoms; i++)
// lastAtomOrder[i] = cu.getAtomIndex()[i];
// }
// void execute() {
// // Reorder the per-DOF variables to reflect the new atom order.
//
// if (perDofValues.getNumParameters() == 0)
// return;
// int numAtoms = cu.getNumAtoms();
// if (deviceValuesAreCurrent)
// perDofValues.getParameterValues(localPerDofValues);
// vector<vector<cl_float> > swap(3*numAtoms);
// for (int i = 0; i < numAtoms; i++) {
// swap[3*lastAtomOrder[i]] = localPerDofValues[3*i];
// swap[3*lastAtomOrder[i]+1] = localPerDofValues[3*i+1];
// swap[3*lastAtomOrder[i]+2] = localPerDofValues[3*i+2];
// }
// CudaArray<cl_int>& order = cu.getAtomIndex();
// for (int i = 0; i < numAtoms; i++) {
// localPerDofValues[3*i] = swap[3*order[i]];
// localPerDofValues[3*i+1] = swap[3*order[i]+1];
// localPerDofValues[3*i+2] = swap[3*order[i]+2];
// }
// perDofValues.setParameterValues(localPerDofValues);
// for (int i = 0; i < numAtoms; i++)
// lastAtomOrder[i] = order[i];
// deviceValuesAreCurrent = true;
// }
//private:
// CudaContext& cu;
// CudaParameterSet& perDofValues;
// vector<vector<cl_float> >& localPerDofValues;
// bool& deviceValuesAreCurrent;
// vector<int> lastAtomOrder;
//};
//
//CudaIntegrateCustomStepKernel::~CudaIntegrateCustomStepKernel() {
// cu.setAsCurrent();
// if (globalValues != NULL)
// delete globalValues;
// if (contextParameterValues != NULL)
// delete contextParameterValues;
// if (sumBuffer != NULL)
// delete sumBuffer;
// if (energy != NULL)
// delete energy;
// if (uniformRandoms != NULL)
// delete uniformRandoms;
// if (randomSeed != NULL)
// delete randomSeed;
// if (perDofValues != NULL)
// delete perDofValues;
//}
//
//void CudaIntegrateCustomStepKernel::initialize(const System& system, const CustomIntegrator& integrator) {
// cu.setAsCurrent();
// cu.getPlatformData().initializeContexts(system);
// cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
// numGlobalVariables = integrator.getNumGlobalVariables();
// globalValues = new CudaArray<cl_float>(cu, max(1, numGlobalVariables), "globalVariables", true);
// sumBuffer = new CudaArray<cl_float>(cu, 3*system.getNumParticles(), "sumBuffer");
// energy = new CudaArray<cl_float>(cu, 1, "energy");
// perDofValues = new CudaParameterSet(cu, integrator.getNumPerDofVariables(), 3*system.getNumParticles(), "perDofVariables");
// cu.addReorderListener(new ReorderListener(cu, *perDofValues, localPerDofValues, deviceValuesAreCurrent));
// prevStepSize = -1.0;
// SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed());
//}
//
//string CudaIntegrateCustomStepKernel::createGlobalComputation(const string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator, const string& energyName) {
// map<string, Lepton::ParsedExpression> expressions;
// if (variable == "dt")
// expressions["dt[0].y = "] = expr;
// else {
// for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
// if (variable == integrator.getGlobalVariableName(i))
// expressions["globals["+cu.intToString(i)+"] = "] = expr;
// for (int i = 0; i < (int) parameterNames.size(); i++)
// if (variable == parameterNames[i]) {
// expressions["params["+cu.intToString(i)+"] = "] = expr;
// modifiesParameters = true;
// }
// }
// if (expressions.size() == 0)
// throw OpenMMException("Unknown global variable: "+variable);
// map<string, string> variables;
// variables["dt"] = "dt[0].y";
// variables["uniform"] = "uniform";
// variables["gaussian"] = "gaussian";
// variables[energyName] = "energy[0]";
// for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
// variables[integrator.getGlobalVariableName(i)] = "globals["+cu.intToString(i)+"]";
// for (int i = 0; i < (int) parameterNames.size(); i++)
// variables[parameterNames[i]] = "params["+cu.intToString(i)+"]";
// vector<pair<string, string> > functions;
// return cu.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp", "");
//}
//
//string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const string& forceName, const string& energyName) {
// const string suffixes[] = {".x", ".y", ".z"};
// string suffix = suffixes[component];
// map<string, Lepton::ParsedExpression> expressions;
// if (variable == "x")
// expressions["position"+suffix+" = "] = expr;
// else if (variable == "v")
// expressions["velocity"+suffix+" = "] = expr;
// else if (variable == "")
// expressions["sum[3*index+"+cu.intToString(component)+"] = "] = expr;
// else {
// for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
// if (variable == integrator.getPerDofVariableName(i))
// expressions["perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i)+" = "] = expr;
// }
// if (expressions.size() == 0)
// throw OpenMMException("Unknown per-DOF variable: "+variable);
// map<string, string> variables;
// variables["x"] = "position"+suffix;
// variables["v"] = "velocity"+suffix;
// variables[forceName] = "f"+suffix;
// variables["gaussian"] = "gaussian"+suffix;
// variables["uniform"] = "uniform"+suffix;
// variables["m"] = "mass";
// variables["dt"] = "stepSize";
// variables[energyName] = "energy[0]";
// for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
// variables[integrator.getGlobalVariableName(i)] = "globals["+cu.intToString(i)+"]";
// for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
// variables[integrator.getPerDofVariableName(i)] = "perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i);
// for (int i = 0; i < (int) parameterNames.size(); i++)
// variables[parameterNames[i]] = "params["+cu.intToString(i)+"]";
// vector<pair<string, string> > functions;
// string tempType = (cu.getSupportsDoublePrecision() ? "double" : "float");
// return cu.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp"+cu.intToString(component)+"_", "", tempType);
//}
//
//void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid) {
// CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
// int numAtoms = cu.getNumAtoms();
// int numSteps = integrator.getNumComputations();
// if (!hasInitializedKernels) {
// hasInitializedKernels = true;
//
// // Initialize various data structures.
//
// const map<string, double>& params = context.getParameters();
// contextParameterValues = new CudaArray<cl_float>(cu, max(1, (int) params.size()), "contextParameters", true);
// for (map<string, double>::const_iterator iter = params.begin(); iter != params.end(); ++iter) {
// contextParameterValues->set(parameterNames.size(), (float) iter->second);
// parameterNames.push_back(iter->first);
// }
// contextParameterValues->upload();
// kernels.resize(integrator.getNumComputations());
// requiredGaussian.resize(integrator.getNumComputations(), 0);
// requiredUniform.resize(integrator.getNumComputations(), 0);
// needsForces.resize(numSteps, false);
// needsEnergy.resize(numSteps, false);
// forceGroup.resize(numSteps, -2);
// invalidatesForces.resize(numSteps, false);
// merged.resize(numSteps, false);
// modifiesParameters = false;
// map<string, string> defines;
// defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
// defines["WORK_GROUP_SIZE"] = cu.intToString(CudaContext::ThreadBlockSize);
//
// // Initialize the random number generator.
//
// uniformRandoms = new CudaArray<mm_float4>(cu, cu.getNumAtoms(), "uniformRandoms");
// randomSeed = new CudaArray<mm_int4>(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, "randomSeed");
// vector<mm_int4> seed(randomSeed->getSize());
// unsigned int r = integrator.getRandomNumberSeed()+1;
// for (int i = 0; i < randomSeed->getSize(); i++) {
// seed[i].x = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
// seed[i].y = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
// seed[i].z = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
// seed[i].w = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
// }
// randomSeed->upload(seed);
// CUmodule randomProgram = cu.createModule(CudaKernelSources::customIntegrator, defines);
// randomKernel = cu.getKernel(randomProgram, "generateRandomNumbers");
// randomKernel.setArg<cu::Buffer>(0, uniformRandoms->getDevicePointer());
// randomKernel.setArg<cu::Buffer>(1, randomSeed->getDevicePointer());
//
// // Build a list of all variables that affect the forces, so we can tell which
// // steps invalidate them.
//
// set<string> affectsForce;
// affectsForce.insert("x");
// for (vector<ForceImpl*>::const_iterator iter = context.getForceImpls().begin(); iter != context.getForceImpls().end(); ++iter) {
// const map<string, double> params = (*iter)->getDefaultParameters();
// for (map<string, double>::const_iterator param = params.begin(); param != params.end(); ++param)
// affectsForce.insert(param->first);
// }
//
// // Record information about all the computation steps.
//
// stepType.resize(numSteps);
// vector<string> variable(numSteps);
// vector<Lepton::ParsedExpression> expression(numSteps);
// vector<string> forceGroupName;
// vector<string> energyGroupName;
// for (int i = 0; i < 32; i++) {
// stringstream fname;
// fname << "f" << i;
// forceGroupName.push_back(fname.str());
// stringstream ename;
// ename << "energy" << i;
// energyGroupName.push_back(ename.str());
// }
// vector<string> forceName(numSteps, "f");
// vector<string> energyName(numSteps, "energy");
// for (int step = 0; step < numSteps; step++) {
// string expr;
// integrator.getComputationStep(step, stepType[step], variable[step], expr);
// if (expr.size() > 0) {
// expression[step] = Lepton::Parser::parse(expr).optimize();
// if (usesVariable(expression[step], "f")) {
// needsForces[step] = true;
// forceGroup[step] = -1;
// }
// if (usesVariable(expression[step], "energy")) {
// needsEnergy[step] = true;
// forceGroup[step] = -1;
// }
// for (int i = 0; i < 32; i++) {
// if (usesVariable(expression[step], forceGroupName[i])) {
// if (forceGroup[step] != -2)
// throw OpenMMException("A single computation step cannot depend on multiple force groups");
// needsForces[step] = true;
// forceGroup[step] = 1<<i;
// forceName[step] = forceGroupName[i];
// }
// if (usesVariable(expression[step], energyGroupName[i])) {
// if (forceGroup[step] != -2)
// throw OpenMMException("A single computation step cannot depend on multiple force groups");
// needsEnergy[step] = true;
// forceGroup[step] = 1<<i;
// energyName[step] = energyGroupName[i];
// }
// }
// }
// invalidatesForces[step] = (stepType[step] == CustomIntegrator::ConstrainPositions || affectsForce.find(variable[step]) != affectsForce.end());
// if (forceGroup[step] == -2 && step > 0)
// forceGroup[step] = forceGroup[step-1];
// }
//
// // Determine how each step will represent the position (as just a value, or a value plus a delta).
//
// vector<bool> storePosAsDelta(numSteps, false);
// vector<bool> loadPosAsDelta(numSteps, false);
// bool beforeConstrain = false;
// for (int step = numSteps-1; step >= 0; step--) {
// if (stepType[step] == CustomIntegrator::ConstrainPositions)
// beforeConstrain = true;
// else if (stepType[step] == CustomIntegrator::ComputePerDof && variable[step] == "x" && beforeConstrain)
// storePosAsDelta[step] = true;
// }
// bool storedAsDelta = false;
// for (int step = 0; step < numSteps; step++) {
// loadPosAsDelta[step] = storedAsDelta;
// if (storePosAsDelta[step] == true)
// storedAsDelta = true;
// if (stepType[step] == CustomIntegrator::ConstrainPositions)
// storedAsDelta = false;
// }
//
// // Identify steps that can be merged into a single kernel.
//
// for (int step = 1; step < numSteps; step++) {
// if (needsForces[step] || needsEnergy[step])
// continue;
// if (stepType[step-1] == CustomIntegrator::ComputeGlobal && stepType[step] == CustomIntegrator::ComputeGlobal)
// merged[step] = true;
// if (stepType[step-1] == CustomIntegrator::ComputePerDof && stepType[step] == CustomIntegrator::ComputePerDof &&
// !usesVariable(expression[step], "uniform"))
// merged[step] = true;
// }
//
// // Loop over all steps and create the kernels for them.
//
// for (int step = 0; step < numSteps; step++) {
// if ((stepType[step] == CustomIntegrator::ComputePerDof || stepType[step] == CustomIntegrator::ComputeSum) && !merged[step]) {
// // Compute a per-DOF value.
//
// stringstream compute;
// for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i];
// compute << buffer.getType()<<" perDofx"<<cu.intToString(i+1)<<" = perDofValues"<<cu.intToString(i+1)<<"[3*index];\n";
// compute << buffer.getType()<<" perDofy"<<cu.intToString(i+1)<<" = perDofValues"<<cu.intToString(i+1)<<"[3*index+1];\n";
// compute << buffer.getType()<<" perDofz"<<cu.intToString(i+1)<<" = perDofValues"<<cu.intToString(i+1)<<"[3*index+2];\n";
// }
// string convert = (cu.getSupportsDoublePrecision() ? "convert_float4(" : "(");
// int numGaussian = 0, numUniform = 0;
// for (int j = step; j < numSteps && (j == step || merged[j]); j++) {
// compute << "{\n";
// for (int i = 0; i < 3; i++)
// compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j], i, integrator, forceName[j], energyName[j]);
// if (variable[j] == "x") {
// if (storePosAsDelta[j]) {
// if (cu.getSupportsDoublePrecision())
// compute << "posDelta[index] = convert_float4(position-convert_double4(posq[index]));\n";
// else
// compute << "posDelta[index] = position-posq[index];\n";
// }
// else
// compute << "posq[index] = " << convert << "position);\n";
// }
// else if (variable[j] == "v")
// compute << "velm[index] = " << convert << "velocity);\n";
// else {
// for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i];
// compute << "perDofValues"<<cu.intToString(i+1)<<"[3*index] = perDofx"<<cu.intToString(i+1)<<";\n";
// compute << "perDofValues"<<cu.intToString(i+1)<<"[3*index+1] = perDofy"<<cu.intToString(i+1)<<";\n";
// compute << "perDofValues"<<cu.intToString(i+1)<<"[3*index+2] = perDofz"<<cu.intToString(i+1)<<";\n";
// }
// }
// compute << "}\n";
// numGaussian += numAtoms*usesVariable(expression[j], "gaussian");
// numUniform += numAtoms*usesVariable(expression[j], "uniform");
// }
// map<string, string> replacements;
// replacements["COMPUTE_STEP"] = compute.str();
// stringstream args;
// for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i];
// string valueName = "perDofValues"+cu.intToString(i+1);
// args << ", __global " << buffer.getType() << "* restrict " << valueName;
// }
// replacements["PARAMETER_ARGUMENTS"] = args.str();
// if (loadPosAsDelta[step])
// defines["LOAD_POS_AS_DELTA"] = "1";
// else if (defines.find("LOAD_POS_AS_DELTA") != defines.end())
// defines.erase("LOAD_POS_AS_DELTA");
// CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customIntegratorPerDof, replacements), defines);
// cu::Kernel kernel = cu.getKernel(module, "computePerDof");
// kernels[step].push_back(kernel);
// requiredGaussian[step] = numGaussian;
// requiredUniform[step] = numUniform;
// int index = 0;
// kernel.setArg<cu::Buffer>(index++, cu.getPosq().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, integration.getPosDelta().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, cu.getVelm().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, cu.getForce().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, integration.getStepSize().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, globalValues->getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, contextParameterValues->getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, sumBuffer->getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, integration.getRandom().getDevicePointer());
// index++;
// kernel.setArg<cu::Buffer>(index++, uniformRandoms->getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, energy->getDevicePointer());
// for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
// kernel.setArg<cu::Memory>(index++, perDofValues->getBuffers()[i].getMemory());
// if (stepType[step] == CustomIntegrator::ComputeSum) {
// // Create a second kernel for this step that sums the values.
//
// module = cu.createModule(CudaKernelSources::customIntegrator, defines);
// kernel = cu.getKernel(module, "computeSum");
// kernels[step].push_back(kernel);
// index = 0;
// kernel.setArg<cu::Buffer>(index++, sumBuffer->getDevicePointer());
// bool found = false;
// for (int j = 0; j < integrator.getNumGlobalVariables() && !found; j++)
// if (variable[step] == integrator.getGlobalVariableName(j)) {
// kernel.setArg<cu::Buffer>(index++, globalValues->getDevicePointer());
// kernel.setArg<cl_uint>(index++, j);
// found = true;
// }
// for (int j = 0; j < (int) parameterNames.size() && !found; j++)
// if (variable[step] == parameterNames[j]) {
// kernel.setArg<cu::Buffer>(index++, contextParameterValues->getDevicePointer());
// kernel.setArg<cl_uint>(index++, j);
// found = true;
// modifiesParameters = true;
// }
// if (!found)
// throw OpenMMException("Unknown global variable: "+variable[step]);
// kernel.setArg<cl_int>(index++, 3*numAtoms);
// }
// }
// else if (stepType[step] == CustomIntegrator::ComputeGlobal && !merged[step]) {
// // Compute a global value.
//
// stringstream compute;
// for (int i = step; i < numSteps && (i == step || merged[i]); i++)
// compute << "{\n" << createGlobalComputation(variable[i], expression[i], integrator, energyName[i]) << "}\n";
// map<string, string> replacements;
// replacements["COMPUTE_STEP"] = compute.str();
// CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customIntegratorGlobal, replacements), defines);
// cu::Kernel kernel = cu.getKernel(module, "computeGlobal");
// kernels[step].push_back(kernel);
// int index = 0;
// kernel.setArg<cu::Buffer>(index++, integration.getStepSize().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, globalValues->getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, contextParameterValues->getDevicePointer());
// index += 2;
// kernel.setArg<cu::Buffer>(index++, energy->getDevicePointer());
// }
// else if (stepType[step] == CustomIntegrator::ConstrainPositions) {
// // Apply position constraints.
//
// CUmodule module = cu.createModule(CudaKernelSources::customIntegrator, defines);
// cu::Kernel kernel = cu.getKernel(module, "applyPositionDeltas");
// kernels[step].push_back(kernel);
// int index = 0;
// kernel.setArg<cu::Buffer>(index++, cu.getPosq().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, integration.getPosDelta().getDevicePointer());
// }
// }
//
// // Create the kernel for summing energy.
//
// CUmodule module = cu.createModule(CudaKernelSources::customIntegrator, defines);
// sumEnergyKernel = cu.getKernel(module, "computeSum");
// int index = 0;
// sumEnergyKernel.setArg<cu::Buffer>(index++, cu.getEnergyBuffer().getDevicePointer());
// sumEnergyKernel.setArg<cu::Buffer>(index++, energy->getDevicePointer());
// sumEnergyKernel.setArg<cl_int>(index++, 0);
// sumEnergyKernel.setArg<cl_int>(index++, cu.getEnergyBuffer().getSize());
// }
//
// // Make sure all values (variables, parameters, etc.) stored on the device are up to date.
//
// if (!deviceValuesAreCurrent) {
// perDofValues->setParameterValues(localPerDofValues);
// deviceValuesAreCurrent = true;
// }
// localValuesAreCurrent = false;
// double stepSize = integrator.getStepSize();
// if (stepSize != prevStepSize) {
// integration.getStepSize()[0].y = (cl_float) stepSize;
// integration.getStepSize().upload();
// prevStepSize = stepSize;
// }
// bool paramsChanged = false;
// for (int i = 0; i < (int) parameterNames.size(); i++) {
// float value = (float) context.getParameter(parameterNames[i]);
// if (value != contextParameterValues->get(i)) {
// contextParameterValues->set(i, value);
// paramsChanged = true;
// }
// }
// if (paramsChanged)
// contextParameterValues->upload();
//
// // Loop over computation steps in the integrator and execute them.
//
// for (int i = 0; i < numSteps; i++) {
// if ((needsForces[i] || needsEnergy[i]) && (!forcesAreValid || context.getLastForceGroups() != forceGroup[i])) {
// // Recompute forces and/or energy. Figure out what is actually needed
// // between now and the next time they get invalidated again.
//
// bool computeForce = false, computeEnergy = false;
// for (int j = i; ; j++) {
// if (needsForces[j])
// computeForce = true;
// if (needsEnergy[j])
// computeEnergy = true;
// if (invalidatesForces[j])
// break;
// if (j == numSteps-1)
// j = -1;
// if (j == i-1)
// break;
// }
// recordChangedParameters(context);
// context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroup[i]);
// if (computeEnergy)
// cu.executeKernel(sumEnergyKernel, CudaContext::ThreadBlockSize, CudaContext::ThreadBlockSize);
// forcesAreValid = true;
// }
// if (stepType[i] == CustomIntegrator::ComputePerDof && !merged[i]) {
// kernels[i][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[i]));
// if (requiredUniform[i] > 0)
// cu.executeKernel(randomKernel, numAtoms);
// cu.executeKernel(kernels[i][0], numAtoms);
// }
// else if (stepType[i] == CustomIntegrator::ComputeGlobal && !merged[i]) {
// kernels[i][0].setArg<cl_float>(3, SimTKOpenMMUtilities::getUniformlyDistributedRandomNumber());
// kernels[i][0].setArg<cl_float>(4, SimTKOpenMMUtilities::getNormallyDistributedRandomNumber());
// cu.executeKernel(kernels[i][0], 1, 1);
// }
// else if (stepType[i] == CustomIntegrator::ComputeSum) {
// kernels[i][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[i]));
// if (requiredUniform[i] > 0)
// cu.executeKernel(randomKernel, numAtoms);
// cu.executeKernel(kernels[i][0], numAtoms);
// cu.executeKernel(kernels[i][1], CudaContext::ThreadBlockSize, CudaContext::ThreadBlockSize);
// }
// else if (stepType[i] == CustomIntegrator::UpdateContextState) {
// recordChangedParameters(context);
// context.updateContextState();
// }
// else if (stepType[i] == CustomIntegrator::ConstrainPositions) {
// cu.getIntegrationUtilities().applyConstraints(integrator.getConstraintTolerance());
// cu.executeKernel(kernels[i][0], numAtoms);
// cu.getIntegrationUtilities().computeVirtualSites();
// }
// else if (stepType[i] == CustomIntegrator::ConstrainVelocities) {
// cu.getIntegrationUtilities().applyVelocityConstraints(integrator.getConstraintTolerance());
// }
// if (invalidatesForces[i])
// forcesAreValid = false;
// }
// recordChangedParameters(context);
//
// // Update the time and step count.
//
// cu.setTime(cu.getTime()+stepSize);
// cu.setStepCount(cu.getStepCount()+1);
//}
//
//void CudaIntegrateCustomStepKernel::recordChangedParameters(ContextImpl& context) {
// if (!modifiesParameters)
// return;
// contextParameterValues->download();
// for (int i = 0; i < (int) parameterNames.size(); i++) {
// float value = (float) context.getParameter(parameterNames[i]);
// if (value != contextParameterValues->get(i))
// context.setParameter(parameterNames[i], contextParameterValues->get(i));
// }
//}
//
//void CudaIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, vector<double>& values) const {
// globalValues->download();
// values.resize(numGlobalVariables);
// for (int i = 0; i < numGlobalVariables; i++)
// values[i] = globalValues->get(i);
//}
//
//void CudaIntegrateCustomStepKernel::setGlobalVariables(ContextImpl& context, const vector<double>& values) {
// for (int i = 0; i < numGlobalVariables; i++)
// globalValues->set(i, (float) values[i]);
// globalValues->upload();
//}
//
//void CudaIntegrateCustomStepKernel::getPerDofVariable(ContextImpl& context, int variable, vector<Vec3>& values) const {
// if (!localValuesAreCurrent) {
// perDofValues->getParameterValues(localPerDofValues);
// localValuesAreCurrent = true;
// }
// values.resize(perDofValues->getNumObjects()/3);
// CudaArray<cl_int>& order = cu.getAtomIndex();
// for (int i = 0; i < (int) values.size(); i++)
// for (int j = 0; j < 3; j++)
// values[order[i]][j] = localPerDofValues[3*i+j][variable];
//}
//
//void CudaIntegrateCustomStepKernel::setPerDofVariable(ContextImpl& context, int variable, const vector<Vec3>& values) {
// if (!localValuesAreCurrent) {
// perDofValues->getParameterValues(localPerDofValues);
// localValuesAreCurrent = true;
// }
// CudaArray<cl_int>& order = cu.getAtomIndex();
// for (int i = 0; i < (int) values.size(); i++)
// for (int j = 0; j < 3; j++)
// localPerDofValues[3*i+j][variable] = (float) values[order[i]][j];
// deviceValuesAreCurrent = false;
//}
void
CudaIntegrateCustomStepKernel
::
setPerDofVariable
(
ContextImpl
&
context
,
int
variable
,
const
vector
<
Vec3
>&
values
)
{
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
if
(
cu
.
getUseDoublePrecision
())
{
if
(
!
localValuesAreCurrent
)
{
perDofValues
->
getParameterValues
(
localPerDofValuesDouble
);
localValuesAreCurrent
=
true
;
}
for
(
int
i
=
0
;
i
<
(
int
)
values
.
size
();
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
localPerDofValuesDouble
[
3
*
i
+
j
][
variable
]
=
values
[
order
[
i
]][
j
];
}
else
{
if
(
!
localValuesAreCurrent
)
{
perDofValues
->
getParameterValues
(
localPerDofValuesFloat
);
localValuesAreCurrent
=
true
;
}
for
(
int
i
=
0
;
i
<
(
int
)
values
.
size
();
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
localPerDofValuesFloat
[
3
*
i
+
j
][
variable
]
=
(
float
)
values
[
order
[
i
]][
j
];
}
deviceValuesAreCurrent
=
false
;
}
CudaApplyAndersenThermostatKernel
::~
CudaApplyAndersenThermostatKernel
()
{
cu
.
setAsCurrent
();
...
...
platforms/cuda2/src/CudaKernels.h
View file @
bd22eada
...
...
@@ -623,50 +623,49 @@ private:
static
const
int
PmeOrder
=
5
;
};
///**
// * This kernel is invoked by CustomNonbondedForce to calculate the forces acting on the system.
// */
//class CudaCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel {
//public:
// CudaCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcCustomNonbondedForceKernel(name, platform),
// hasInitializedKernel(false), cu(cu), params(NULL), globals(NULL), tabulatedFunctionParams(NULL), system(system) {
// }
// ~CudaCalcCustomNonbondedForceKernel();
// /**
// * Initialize the kernel.
// *
// * @param system the System this kernel will be applied to
// * @param force the CustomNonbondedForce this kernel will be used for
// */
// void initialize(const System& system, const CustomNonbondedForce& force);
// /**
// * Execute the kernel to calculate the forces and/or energy.
// *
// * @param context the context in which to execute this kernel
// * @param includeForces true if forces should be calculated
// * @param includeEnergy true if the energy should be calculated
// * @return the potential energy due to the force
// */
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
// /**
// * Copy changed parameters over to a context.
// *
// * @param context the context to copy parameters to
// * @param force the CustomNonbondedForce to copy the parameters from
// */
// void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force);
//private:
// bool hasInitializedKernel;
// CudaContext& cu;
// CudaParameterSet* params;
// CudaArray<cl_float>* globals;
// CudaArray<mm_float4>* tabulatedFunctionParams;
// std::vector<std::string> globalParamNames;
// std::vector<cl_float> globalParamValues;
// std::vector<CudaArray<mm_float4>*> tabulatedFunctions;
// System& system;
//};
//
/**
* This kernel is invoked by CustomNonbondedForce to calculate the forces acting on the system.
*/
class
CudaCalcCustomNonbondedForceKernel
:
public
CalcCustomNonbondedForceKernel
{
public:
CudaCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
System
&
system
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
cu
(
cu
),
params
(
NULL
),
globals
(
NULL
),
tabulatedFunctionParams
(
NULL
),
system
(
system
)
{
}
~
CudaCalcCustomNonbondedForceKernel
();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomNonbondedForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
CustomNonbondedForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomNonbondedForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
);
private:
CudaContext
&
cu
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
*
tabulatedFunctionParams
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
CudaArray
*>
tabulatedFunctions
;
System
&
system
;
};
///**
// * This kernel is invoked by GBSAOBCForce to calculate the forces acting on the system.
// */
...
...
@@ -814,60 +813,58 @@ private:
std
::
vector
<
float
>
globalParamValues
;
};
///**
// * This kernel is invoked by CustomHbondForce to calculate the forces acting on the system.
// */
//class CudaCalcCustomHbondForceKernel : public CalcCustomHbondForceKernel {
//public:
// CudaCalcCustomHbondForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcCustomHbondForceKernel(name, platform),
// hasInitializedKernel(false), cu(cu), donorParams(NULL), acceptorParams(NULL), donors(NULL), acceptors(NULL),
// donorBufferIndices(NULL), acceptorBufferIndices(NULL), globals(NULL), donorExclusions(NULL), acceptorExclusions(NULL),
// tabulatedFunctionParams(NULL), system(system) {
// }
// ~CudaCalcCustomHbondForceKernel();
// /**
// * Initialize the kernel.
// *
// * @param system the System this kernel will be applied to
// * @param force the CustomHbondForce this kernel will be used for
// */
// void initialize(const System& system, const CustomHbondForce& force);
// /**
// * Execute the kernel to calculate the forces and/or energy.
// *
// * @param context the context in which to execute this kernel
// * @param includeForces true if forces should be calculated
// * @param includeEnergy true if the energy should be calculated
// * @return the potential energy due to the force
// */
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
// /**
// * Copy changed parameters over to a context.
// *
// * @param context the context to copy parameters to
// * @param force the CustomHbondForce to copy the parameters from
// */
// void copyParametersToContext(ContextImpl& context, const CustomHbondForce& force);
//private:
// int numDonors, numAcceptors;
// bool hasInitializedKernel;
// CudaContext& cu;
// CudaParameterSet* donorParams;
// CudaParameterSet* acceptorParams;
// CudaArray<cl_float>* globals;
// CudaArray<mm_int4>* donors;
// CudaArray<mm_int4>* acceptors;
// CudaArray<mm_int4>* donorBufferIndices;
// CudaArray<mm_int4>* acceptorBufferIndices;
// CudaArray<mm_int4>* donorExclusions;
// CudaArray<mm_int4>* acceptorExclusions;
// CudaArray<mm_float4>* tabulatedFunctionParams;
// std::vector<std::string> globalParamNames;
// std::vector<cl_float> globalParamValues;
// std::vector<CudaArray<mm_float4>*> tabulatedFunctions;
// System& system;
// CUfunction donorKernel, acceptorKernel;
//};
/**
* This kernel is invoked by CustomHbondForce to calculate the forces acting on the system.
*/
class
CudaCalcCustomHbondForceKernel
:
public
CalcCustomHbondForceKernel
{
public:
CudaCalcCustomHbondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
System
&
system
)
:
CalcCustomHbondForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cu
(
cu
),
donorParams
(
NULL
),
acceptorParams
(
NULL
),
donors
(
NULL
),
acceptors
(
NULL
),
globals
(
NULL
),
donorExclusions
(
NULL
),
acceptorExclusions
(
NULL
),
tabulatedFunctionParams
(
NULL
),
system
(
system
)
{
}
~
CudaCalcCustomHbondForceKernel
();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomHbondForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
CustomHbondForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomHbondForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomHbondForce
&
force
);
private:
int
numDonors
,
numAcceptors
;
bool
hasInitializedKernel
;
CudaContext
&
cu
;
CudaParameterSet
*
donorParams
;
CudaParameterSet
*
acceptorParams
;
CudaArray
*
globals
;
CudaArray
*
donors
;
CudaArray
*
acceptors
;
CudaArray
*
donorExclusions
;
CudaArray
*
acceptorExclusions
;
CudaArray
*
tabulatedFunctionParams
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
CudaArray
*>
tabulatedFunctions
;
std
::
vector
<
void
*>
donorArgs
,
acceptorArgs
;
System
&
system
;
CUfunction
donorKernel
,
acceptorKernel
;
};
/**
* This kernel is invoked by CustomCompoundBondForce to calculate the forces acting on the system.
...
...
@@ -1062,94 +1059,98 @@ private:
double
prevTemp
,
prevFriction
,
prevErrorTol
;
};
///**
// * This kernel is invoked by CustomIntegrator to take one time step.
// */
//class CudaIntegrateCustomStepKernel : public IntegrateCustomStepKernel {
//public:
// CudaIntegrateCustomStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateCustomStepKernel(name, platform), cu(cu),
// hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), contextParameterValues(NULL), sumBuffer(NULL), energy(NULL),
// uniformRandoms(NULL), randomSeed(NULL), perDofValues(NULL) {
// }
// ~CudaIntegrateCustomStepKernel();
// /**
// * Initialize the kernel.
// *
// * @param system the System this kernel will be applied to
// * @param integrator the CustomIntegrator this kernel will be used for
// */
// void initialize(const System& system, const CustomIntegrator& integrator);
// /**
// * Execute the kernel.
// *
// * @param context the context in which to execute this kernel
// * @param integrator the CustomIntegrator this kernel is being used for
// * @param forcesAreValid if the context has been modified since the last time step, this will be
// * false to show that cached forces are invalid and must be recalculated.
// * On exit, this should specify whether the cached forces are valid at the
// * end of the step.
// */
// void execute(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
// /**
// * Get the values of all global variables.
// *
// * @param context the context in which to execute this kernel
// * @param values on exit, this contains the values
// */
// void getGlobalVariables(ContextImpl& context, std::vector<double>& values) const;
// /**
// * Set the values of all global variables.
// *
// * @param context the context in which to execute this kernel
// * @param values a vector containing the values
// */
// void setGlobalVariables(ContextImpl& context, const std::vector<double>& values);
// /**
// * Get the values of a per-DOF variable.
// *
// * @param context the context in which to execute this kernel
// * @param variable the index of the variable to get
// * @param values on exit, this contains the values
// */
// void getPerDofVariable(ContextImpl& context, int variable, std::vector<Vec3>& values) const;
// /**
// * Set the values of a per-DOF variable.
// *
// * @param context the context in which to execute this kernel
// * @param variable the index of the variable to get
// * @param values a vector containing the values
// */
// void setPerDofVariable(ContextImpl& context, int variable, const std::vector<Vec3>& values);
//private:
// class ReorderListener;
// std::string createGlobalComputation(const std::string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator, const std::string& energyName);
// std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName);
// void recordChangedParameters(ContextImpl& context);
// CudaContext& cu;
// double prevStepSize;
// int numGlobalVariables;
// bool hasInitializedKernels, deviceValuesAreCurrent, modifiesParameters;
// mutable bool localValuesAreCurrent;
// CudaArray<cl_float>* globalValues;
// CudaArray<cl_float>* contextParameterValues;
// CudaArray<cl_float>* sumBuffer;
// CudaArray<cl_float>* energy;
// CudaArray<mm_float4>* uniformRandoms;
// CudaArray<mm_int4>* randomSeed;
// CudaParameterSet* perDofValues;
// mutable std::vector<std::vector<cl_float> > localPerDofValues;
// std::vector<std::vector<CUfunction> > kernels;
// CUfunction sumEnergyKernel, randomKernel;
// std::vector<CustomIntegrator::ComputationType> stepType;
// std::vector<bool> needsForces;
// std::vector<bool> needsEnergy;
// std::vector<bool> invalidatesForces;
// std::vector<bool> merged;
// std::vector<int> forceGroup;
// std::vector<int> requiredGaussian;
// std::vector<int> requiredUniform;
// std::vector<std::string> parameterNames;
//};
/**
* This kernel is invoked by CustomIntegrator to take one time step.
*/
class
CudaIntegrateCustomStepKernel
:
public
IntegrateCustomStepKernel
{
public:
CudaIntegrateCustomStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
IntegrateCustomStepKernel
(
name
,
platform
),
cu
(
cu
),
hasInitializedKernels
(
false
),
localValuesAreCurrent
(
false
),
globalValues
(
NULL
),
contextParameterValues
(
NULL
),
sumBuffer
(
NULL
),
energy
(
NULL
),
uniformRandoms
(
NULL
),
randomSeed
(
NULL
),
perDofValues
(
NULL
)
{
}
~
CudaIntegrateCustomStepKernel
();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param integrator the CustomIntegrator this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
CustomIntegrator
&
integrator
);
/**
* Execute the kernel.
*
* @param context the context in which to execute this kernel
* @param integrator the CustomIntegrator this kernel is being used for
* @param forcesAreValid if the context has been modified since the last time step, this will be
* false to show that cached forces are invalid and must be recalculated.
* On exit, this should specify whether the cached forces are valid at the
* end of the step.
*/
void
execute
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
/**
* Get the values of all global variables.
*
* @param context the context in which to execute this kernel
* @param values on exit, this contains the values
*/
void
getGlobalVariables
(
ContextImpl
&
context
,
std
::
vector
<
double
>&
values
)
const
;
/**
* Set the values of all global variables.
*
* @param context the context in which to execute this kernel
* @param values a vector containing the values
*/
void
setGlobalVariables
(
ContextImpl
&
context
,
const
std
::
vector
<
double
>&
values
);
/**
* Get the values of a per-DOF variable.
*
* @param context the context in which to execute this kernel
* @param variable the index of the variable to get
* @param values on exit, this contains the values
*/
void
getPerDofVariable
(
ContextImpl
&
context
,
int
variable
,
std
::
vector
<
Vec3
>&
values
)
const
;
/**
* Set the values of a per-DOF variable.
*
* @param context the context in which to execute this kernel
* @param variable the index of the variable to get
* @param values a vector containing the values
*/
void
setPerDofVariable
(
ContextImpl
&
context
,
int
variable
,
const
std
::
vector
<
Vec3
>&
values
);
private:
class
ReorderListener
;
std
::
string
createGlobalComputation
(
const
std
::
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
CustomIntegrator
&
integrator
,
const
std
::
string
&
energyName
);
std
::
string
createPerDofComputation
(
const
std
::
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
int
component
,
CustomIntegrator
&
integrator
,
const
std
::
string
&
forceName
,
const
std
::
string
&
energyName
);
void
recordChangedParameters
(
ContextImpl
&
context
);
CudaContext
&
cu
;
double
prevStepSize
;
int
numGlobalVariables
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
modifiesParameters
;
mutable
bool
localValuesAreCurrent
;
CudaArray
*
globalValues
;
CudaArray
*
contextParameterValues
;
CudaArray
*
sumBuffer
;
CudaArray
*
energy
;
CudaArray
*
uniformRandoms
;
CudaArray
*
randomSeed
;
CudaParameterSet
*
perDofValues
;
mutable
std
::
vector
<
std
::
vector
<
float
>
>
localPerDofValuesFloat
;
mutable
std
::
vector
<
std
::
vector
<
double
>
>
localPerDofValuesDouble
;
std
::
vector
<
float
>
contextValuesFloat
;
std
::
vector
<
double
>
contextValuesDouble
;
std
::
vector
<
std
::
vector
<
CUfunction
>
>
kernels
;
std
::
vector
<
std
::
vector
<
std
::
vector
<
void
*>
>
>
kernelArgs
;
CUfunction
sumEnergyKernel
,
randomKernel
;
std
::
vector
<
CustomIntegrator
::
ComputationType
>
stepType
;
std
::
vector
<
bool
>
needsForces
;
std
::
vector
<
bool
>
needsEnergy
;
std
::
vector
<
bool
>
invalidatesForces
;
std
::
vector
<
bool
>
merged
;
std
::
vector
<
int
>
forceGroup
;
std
::
vector
<
int
>
requiredGaussian
;
std
::
vector
<
int
>
requiredUniform
;
std
::
vector
<
std
::
string
>
parameterNames
;
};
/**
* This kernel is invoked by AndersenThermostat at the start of each time step to adjust the particle velocities.
...
...
platforms/cuda2/src/CudaParameterSet.cpp
View file @
bd22eada
...
...
@@ -39,11 +39,12 @@ using namespace std;
throw OpenMMException(m.str());\
}
CudaParameterSet
::
CudaParameterSet
(
CudaContext
&
context
,
int
numParameters
,
int
numObjects
,
const
string
&
name
,
bool
bufferPerParameter
)
:
CudaParameterSet
::
CudaParameterSet
(
CudaContext
&
context
,
int
numParameters
,
int
numObjects
,
const
string
&
name
,
bool
bufferPerParameter
,
bool
useDoublePrecision
)
:
context
(
context
),
numParameters
(
numParameters
),
numObjects
(
numObjects
),
name
(
name
)
{
int
params
=
numParameters
;
int
bufferCount
=
0
;
int
elementSize
=
4
;
elementSize
=
(
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
));
string
elementType
=
(
useDoublePrecision
?
"double"
:
"float"
);
CUdeviceptr
pointer
;
string
errorMessage
=
"Error creating parameter set "
+
name
;
if
(
!
bufferPerParameter
)
{
...
...
@@ -51,14 +52,14 @@ CudaParameterSet::CudaParameterSet(CudaContext& context, int numParameters, int
CHECK_RESULT
(
cuMemAlloc
(
&
pointer
,
numObjects
*
elementSize
*
4
));
std
::
stringstream
name
;
name
<<
"param"
<<
(
++
bufferCount
);
buffers
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
"float"
,
4
,
elementSize
*
4
,
pointer
));
buffers
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
elementType
,
4
,
elementSize
*
4
,
pointer
));
params
-=
4
;
}
if
(
params
>
1
)
{
CHECK_RESULT
(
cuMemAlloc
(
&
pointer
,
numObjects
*
elementSize
*
2
));
std
::
stringstream
name
;
name
<<
"param"
<<
(
++
bufferCount
);
buffers
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
"float"
,
2
,
elementSize
*
2
,
pointer
));
buffers
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
elementType
,
2
,
elementSize
*
2
,
pointer
));
params
-=
2
;
}
}
...
...
@@ -66,50 +67,55 @@ CudaParameterSet::CudaParameterSet(CudaContext& context, int numParameters, int
CHECK_RESULT
(
cuMemAlloc
(
&
pointer
,
numObjects
*
elementSize
));
std
::
stringstream
name
;
name
<<
"param"
<<
(
++
bufferCount
);
buffers
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
"float"
,
1
,
elementSize
,
pointer
));
buffers
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
elementType
,
1
,
elementSize
,
pointer
));
params
--
;
}
}
CudaParameterSet
::~
CudaParameterSet
()
{
string
errorMessage
=
"Error freeing device memory"
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
CHECK_RESULT
(
cuMemFree
(
buffers
[
i
].
getMemory
()));
if
(
context
.
getContextIsValid
())
{
string
errorMessage
=
"Error freeing device memory"
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
CHECK_RESULT
(
cuMemFree
(
buffers
[
i
].
getMemory
()));
}
}
void
CudaParameterSet
::
getParameterValues
(
vector
<
vector
<
float
>
>&
values
)
{
template
<
class
T
>
void
CudaParameterSet
::
getParameterValues
(
vector
<
vector
<
T
>
>&
values
)
{
if
(
sizeof
(
T
)
!=
elementSize
)
throw
OpenMMException
(
"Called getParameterValues() with vector of wrong type"
);
values
.
resize
(
numObjects
);
for
(
int
i
=
0
;
i
<
numObjects
;
i
++
)
values
[
i
].
resize
(
numParameters
);
int
base
=
0
;
string
errorMessage
=
"Error downloading parameter set "
+
name
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
buffers
[
i
].
get
Typ
e
()
==
"float4"
)
{
vector
<
float4
>
data
(
numObjects
);
if
(
buffers
[
i
].
get
Siz
e
()
==
4
*
elementSize
)
{
vector
<
T
>
data
(
4
*
numObjects
);
CHECK_RESULT
(
cuMemcpyDtoH
(
&
data
[
0
],
buffers
[
i
].
getMemory
(),
numObjects
*
buffers
[
i
].
getSize
()));
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
values
[
j
][
base
]
=
data
[
j
]
.
x
;
values
[
j
][
base
]
=
data
[
4
*
j
];
if
(
base
+
1
<
numParameters
)
values
[
j
][
base
+
1
]
=
data
[
j
].
y
;
values
[
j
][
base
+
1
]
=
data
[
4
*
j
+
1
]
;
if
(
base
+
2
<
numParameters
)
values
[
j
][
base
+
2
]
=
data
[
j
].
z
;
values
[
j
][
base
+
2
]
=
data
[
4
*
j
+
2
]
;
if
(
base
+
3
<
numParameters
)
values
[
j
][
base
+
3
]
=
data
[
j
].
w
;
values
[
j
][
base
+
3
]
=
data
[
4
*
j
+
3
]
;
}
base
+=
4
;
}
else
if
(
buffers
[
i
].
get
Typ
e
()
==
"float2"
)
{
vector
<
float2
>
data
(
numObjects
);
else
if
(
buffers
[
i
].
get
Siz
e
()
==
2
*
elementSize
)
{
vector
<
T
>
data
(
2
*
numObjects
);
CHECK_RESULT
(
cuMemcpyDtoH
(
&
data
[
0
],
buffers
[
i
].
getMemory
(),
numObjects
*
buffers
[
i
].
getSize
()));
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
values
[
j
][
base
]
=
data
[
j
]
.
x
;
values
[
j
][
base
]
=
data
[
2
*
j
];
if
(
base
+
1
<
numParameters
)
values
[
j
][
base
+
1
]
=
data
[
j
].
y
;
values
[
j
][
base
+
1
]
=
data
[
2
*
j
+
1
]
;
}
base
+=
2
;
}
else
if
(
buffers
[
i
].
get
Typ
e
()
==
"float"
)
{
vector
<
float
>
data
(
numObjects
);
else
if
(
buffers
[
i
].
get
Siz
e
()
==
elementSize
)
{
vector
<
T
>
data
(
numObjects
);
CHECK_RESULT
(
cuMemcpyDtoH
(
&
data
[
0
],
buffers
[
i
].
getMemory
(),
numObjects
*
buffers
[
i
].
getSize
()));
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
values
[
j
][
base
]
=
data
[
j
];
...
...
@@ -120,36 +126,39 @@ void CudaParameterSet::getParameterValues(vector<vector<float> >& values) {
}
}
void
CudaParameterSet
::
setParameterValues
(
const
vector
<
vector
<
float
>
>&
values
)
{
template
<
class
T
>
void
CudaParameterSet
::
setParameterValues
(
const
vector
<
vector
<
T
>
>&
values
)
{
if
(
sizeof
(
T
)
!=
elementSize
)
throw
OpenMMException
(
"Called setParameterValues() with vector of wrong type"
);
int
base
=
0
;
string
errorMessage
=
"Error uploading parameter set "
+
name
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
buffers
[
i
].
get
Typ
e
()
==
"float4"
)
{
vector
<
float4
>
data
(
numObjects
);
if
(
buffers
[
i
].
get
Siz
e
()
==
4
*
elementSize
)
{
vector
<
T
>
data
(
4
*
numObjects
);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
data
[
j
]
.
x
=
values
[
j
][
base
];
data
[
4
*
j
]
=
values
[
j
][
base
];
if
(
base
+
1
<
numParameters
)
data
[
j
].
y
=
values
[
j
][
base
+
1
];
data
[
4
*
j
+
1
]
=
values
[
j
][
base
+
1
];
if
(
base
+
2
<
numParameters
)
data
[
j
].
z
=
values
[
j
][
base
+
2
];
data
[
4
*
j
+
2
]
=
values
[
j
][
base
+
2
];
if
(
base
+
3
<
numParameters
)
data
[
j
].
w
=
values
[
j
][
base
+
3
];
data
[
4
*
j
+
3
]
=
values
[
j
][
base
+
3
];
}
CHECK_RESULT
(
cuMemcpyHtoD
(
buffers
[
i
].
getMemory
(),
&
data
[
0
],
numObjects
*
buffers
[
i
].
getSize
()));
base
+=
4
;
}
else
if
(
buffers
[
i
].
get
Typ
e
()
==
"float2"
)
{
vector
<
float2
>
data
(
numObjects
);
else
if
(
buffers
[
i
].
get
Siz
e
()
==
2
*
elementSize
)
{
vector
<
T
>
data
(
2
*
numObjects
);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
data
[
j
]
.
x
=
values
[
j
][
base
];
data
[
2
*
j
]
=
values
[
j
][
base
];
if
(
base
+
1
<
numParameters
)
data
[
j
].
y
=
values
[
j
][
base
+
1
];
data
[
2
*
j
+
1
]
=
values
[
j
][
base
+
1
];
}
CHECK_RESULT
(
cuMemcpyHtoD
(
buffers
[
i
].
getMemory
(),
&
data
[
0
],
numObjects
*
buffers
[
i
].
getSize
()));
base
+=
2
;
}
else
if
(
buffers
[
i
].
get
Typ
e
()
==
"float"
)
{
vector
<
float
>
data
(
numObjects
);
else
if
(
buffers
[
i
].
get
Siz
e
()
==
elementSize
)
{
vector
<
T
>
data
(
numObjects
);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
data
[
j
]
=
values
[
j
][
base
];
CHECK_RESULT
(
cuMemcpyHtoD
(
buffers
[
i
].
getMemory
(),
&
data
[
0
],
numObjects
*
buffers
[
i
].
getSize
()));
...
...
@@ -164,16 +173,26 @@ string CudaParameterSet::getParameterSuffix(int index, const std::string& extraS
const
string
suffixes
[]
=
{
".x"
,
".y"
,
".z"
,
".w"
};
int
buffer
=
-
1
;
for
(
int
i
=
0
;
buffer
==
-
1
&&
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
index
*
sizeof
(
float
)
<
buffers
[
i
].
getSize
())
if
(
index
*
elementSize
<
buffers
[
i
].
getSize
())
buffer
=
i
;
else
index
-=
buffers
[
i
].
getSize
()
/
sizeof
(
float
)
;
index
-=
buffers
[
i
].
getSize
()
/
elementSize
;
}
if
(
buffer
==
-
1
)
throw
OpenMMException
(
"Internal error: Illegal argument to CudaParameterSet::getParameterSuffix() ("
+
name
+
")"
);
stringstream
suffix
;
suffix
<<
(
buffer
+
1
)
<<
extraSuffix
;
if
(
buffers
[
buffer
].
get
Typ
e
()
!=
"float"
)
if
(
buffers
[
buffer
].
get
Siz
e
()
!=
elementSize
)
suffix
<<
suffixes
[
index
];
return
suffix
.
str
();
}
/**
* Define template instantiations for float and double versions of getParameterValues() and setParameterValues().
*/
namespace
OpenMM
{
template
void
CudaParameterSet
::
getParameterValues
<
float
>(
vector
<
vector
<
float
>
>&
values
);
template
void
CudaParameterSet
::
setParameterValues
<
float
>(
const
vector
<
vector
<
float
>
>&
values
);
template
void
CudaParameterSet
::
getParameterValues
<
double
>(
vector
<
vector
<
double
>
>&
values
);
template
void
CudaParameterSet
::
setParameterValues
<
double
>(
const
vector
<
vector
<
double
>
>&
values
);
}
\ No newline at end of file
platforms/cuda2/src/CudaParameterSet.h
View file @
bd22eada
...
...
@@ -51,8 +51,9 @@ public:
* @param name the name of the parameter set
* @param bufferPerParameter if true, a separate buffer is created for each parameter. If false,
* multiple parameters may be combined into a single buffer.
* @param useDoublePrecision whether values should be stored as single or double precision
*/
CudaParameterSet
(
CudaContext
&
context
,
int
numParameters
,
int
numObjects
,
const
std
::
string
&
name
,
bool
bufferPerParameter
=
false
);
CudaParameterSet
(
CudaContext
&
context
,
int
numParameters
,
int
numObjects
,
const
std
::
string
&
name
,
bool
bufferPerParameter
=
false
,
bool
useDoublePrecision
=
false
);
~
CudaParameterSet
();
/**
* Get the number of parameters.
...
...
@@ -71,13 +72,15 @@ public:
*
* @param values on exit, values[i][j] contains the value of parameter j for object i
*/
void
getParameterValues
(
std
::
vector
<
std
::
vector
<
float
>
>&
values
);
template
<
class
T
>
void
getParameterValues
(
std
::
vector
<
std
::
vector
<
T
>
>&
values
);
/**
* Set the values of all parameters.
*
* @param values values[i][j] contains the value of parameter j for object i
*/
void
setParameterValues
(
const
std
::
vector
<
std
::
vector
<
float
>
>&
values
);
template
<
class
T
>
void
setParameterValues
(
const
std
::
vector
<
std
::
vector
<
T
>
>&
values
);
/**
* Get a set of CudaNonbondedUtilities::ParameterInfo objects which describe the Buffers
* containing the data.
...
...
@@ -95,8 +98,7 @@ public:
std
::
string
getParameterSuffix
(
int
index
,
const
std
::
string
&
extraSuffix
=
""
)
const
;
private:
CudaContext
&
context
;
int
numParameters
;
int
numObjects
;
int
numParameters
,
numObjects
,
elementSize
;
std
::
string
name
;
std
::
vector
<
CudaNonbondedUtilities
::
ParameterInfo
>
buffers
;
};
...
...
platforms/cuda2/src/kernels/customHbondForce.cu
0 → 100644
View file @
bd22eada
/**
* Convert a real4 to a real3 by removing its last element.
*/
inline
__device__
real3
trim
(
real4
v
)
{
return
make_real3
(
v
.
x
,
v
.
y
,
v
.
z
);
}
/**
* This does nothing, and just exists to simply the code generation.
*/
inline
__device__
real3
trim
(
real3
v
)
{
return
v
;
}
/**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/
inline
__device__
real4
delta
(
real4
vec1
,
real4
vec2
)
{
real4
result
=
make_real4
(
vec1
.
x
-
vec2
.
x
,
vec1
.
y
-
vec2
.
y
,
vec1
.
z
-
vec2
.
z
,
0.0
f
);
result
.
w
=
result
.
x
*
result
.
x
+
result
.
y
*
result
.
y
+
result
.
z
*
result
.
z
;
return
result
;
}
/**
* Compute the difference between two vectors, taking periodic boundary conditions into account
* and setting the fourth component to the squared magnitude.
*/
inline
__device__
real4
deltaPeriodic
(
real4
vec1
,
real4
vec2
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
)
{
real4
result
=
make_real4
(
vec1
.
x
-
vec2
.
x
,
vec1
.
y
-
vec2
.
y
,
vec1
.
z
-
vec2
.
z
,
0.0
f
);
#ifdef USE_PERIODIC
result
.
x
-=
floor
(
result
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
result
.
y
-=
floor
(
result
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
result
.
z
-=
floor
(
result
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
result
.
w
=
result
.
x
*
result
.
x
+
result
.
y
*
result
.
y
+
result
.
z
*
result
.
z
;
return
result
;
}
/**
* Compute the angle between two vectors. The w component of each vector should contain the squared magnitude.
*/
inline
__device__
real
computeAngle
(
real4
vec1
,
real4
vec2
)
{
real
dotProduct
=
vec1
.
x
*
vec2
.
x
+
vec1
.
y
*
vec2
.
y
+
vec1
.
z
*
vec2
.
z
;
real
cosine
=
dotProduct
*
RSQRT
(
vec1
.
w
*
vec2
.
w
);
real
angle
;
if
(
cosine
>
0.99
f
||
cosine
<
-
0.99
f
)
{
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
real3
crossProduct
=
cross
(
vec1
,
vec2
);
real
scale
=
vec1
.
w
*
vec2
.
w
;
angle
=
asin
(
SQRT
(
dot
(
crossProduct
,
crossProduct
)
/
scale
));
if
(
cosine
<
0.0
f
)
angle
=
M_PI
-
angle
;
}
else
angle
=
acos
(
cosine
);
return
angle
;
}
/**
* Compute the cross product of two vectors, setting the fourth component to the squared magnitude.
*/
inline
__device__
real4
computeCross
(
real4
vec1
,
real4
vec2
)
{
real3
result
=
cross
(
vec1
,
vec2
);
return
make_real4
(
result
.
x
,
result
.
y
,
result
.
z
,
result
.
x
*
result
.
x
+
result
.
y
*
result
.
y
+
result
.
z
*
result
.
z
);
}
/**
* Compute forces on donors.
*/
extern
"C"
__global__
void
computeDonorForces
(
unsigned
long
long
*
__restrict__
force
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
exclusions
,
const
int4
*
__restrict__
donorAtoms
,
const
int4
*
__restrict__
acceptorAtoms
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
PARAMETER_ARGUMENTS
)
{
extern
__shared__
real4
posBuffer
[];
real
energy
=
0
;
real3
f1
=
make_real3
(
0
);
real3
f2
=
make_real3
(
0
);
real3
f3
=
make_real3
(
0
);
for
(
int
donorStart
=
0
;
donorStart
<
NUM_DONORS
;
donorStart
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Load information about the donor this thread will compute forces on.
int
donorIndex
=
donorStart
+
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int4
atoms
,
exclusionIndices
;
real4
d1
,
d2
,
d3
;
if
(
donorIndex
<
NUM_DONORS
)
{
atoms
=
donorAtoms
[
donorIndex
];
d1
=
(
atoms
.
x
>
-
1
?
posq
[
atoms
.
x
]
:
make_real4
(
0
));
d2
=
(
atoms
.
y
>
-
1
?
posq
[
atoms
.
y
]
:
make_real4
(
0
));
d3
=
(
atoms
.
z
>
-
1
?
posq
[
atoms
.
z
]
:
make_real4
(
0
));
#ifdef USE_EXCLUSIONS
exclusionIndices
=
exclusions
[
donorIndex
];
#endif
}
else
atoms
=
make_int4
(
-
1
,
-
1
,
-
1
,
-
1
);
for
(
int
acceptorStart
=
0
;
acceptorStart
<
NUM_ACCEPTORS
;
acceptorStart
+=
blockDim
.
x
)
{
// Load the next block of acceptors into local memory.
int
blockSize
=
min
((
int
)
blockDim
.
x
,
NUM_ACCEPTORS
-
acceptorStart
);
if
(
threadIdx
.
x
<
blockSize
)
{
int4
atoms2
=
acceptorAtoms
[
acceptorStart
+
threadIdx
.
x
];
posBuffer
[
3
*
threadIdx
.
x
]
=
(
atoms2
.
x
>
-
1
?
posq
[
atoms2
.
x
]
:
make_real4
(
0
));
posBuffer
[
3
*
threadIdx
.
x
+
1
]
=
(
atoms2
.
y
>
-
1
?
posq
[
atoms2
.
y
]
:
make_real4
(
0
));
posBuffer
[
3
*
threadIdx
.
x
+
2
]
=
(
atoms2
.
z
>
-
1
?
posq
[
atoms2
.
z
]
:
make_real4
(
0
));
}
__syncthreads
();
if
(
donorIndex
<
NUM_DONORS
)
{
for
(
int
index
=
0
;
index
<
blockSize
;
index
++
)
{
#ifdef USE_EXCLUSIONS
int
acceptorIndex
=
acceptorStart
+
index
;
if
(
acceptorIndex
==
exclusionIndices
.
x
||
acceptorIndex
==
exclusionIndices
.
y
||
acceptorIndex
==
exclusionIndices
.
z
||
acceptorIndex
==
exclusionIndices
.
w
)
continue
;
#endif
// Compute the interaction between a donor and an acceptor.
real4
a1
=
posBuffer
[
3
*
index
];
real4
a2
=
posBuffer
[
3
*
index
+
1
];
real4
a3
=
posBuffer
[
3
*
index
+
2
];
real4
deltaD1A1
=
deltaPeriodic
(
d1
,
a1
,
periodicBoxSize
,
invPeriodicBoxSize
);
#ifdef USE_CUTOFF
if
(
deltaD1A1
.
w
<
CUTOFF_SQUARED
)
{
#endif
COMPUTE_DONOR_FORCE
#ifdef USE_CUTOFF
}
#endif
}
}
}
// Write results
if
(
donorIndex
<
NUM_DONORS
)
{
if
(
atoms
.
x
>
-
1
)
{
atomicAdd
(
&
force
[
atoms
.
x
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f1
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
x
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f1
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
x
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f1
.
z
*
0xFFFFFFFF
)));
__threadfence_block
();
}
if
(
atoms
.
y
>
-
1
)
{
atomicAdd
(
&
force
[
atoms
.
y
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f2
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
y
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f2
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
y
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f2
.
z
*
0xFFFFFFFF
)));
__threadfence_block
();
}
if
(
atoms
.
z
>
-
1
)
{
atomicAdd
(
&
force
[
atoms
.
z
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f3
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
z
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f3
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
z
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f3
.
z
*
0xFFFFFFFF
)));
__threadfence_block
();
}
}
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
}
/**
* Compute forces on acceptors.
*/
extern
"C"
__global__
void
computeAcceptorForces
(
unsigned
long
long
*
__restrict__
force
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
exclusions
,
const
int4
*
__restrict__
donorAtoms
,
const
int4
*
__restrict__
acceptorAtoms
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
PARAMETER_ARGUMENTS
)
{
extern
__shared__
real4
posBuffer
[];
real3
f1
=
make_real3
(
0
);
real3
f2
=
make_real3
(
0
);
real3
f3
=
make_real3
(
0
);
for
(
int
acceptorStart
=
0
;
acceptorStart
<
NUM_ACCEPTORS
;
acceptorStart
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Load information about the acceptor this thread will compute forces on.
int
acceptorIndex
=
acceptorStart
+
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int4
atoms
,
exclusionIndices
;
real4
a1
,
a2
,
a3
;
if
(
acceptorIndex
<
NUM_ACCEPTORS
)
{
atoms
=
acceptorAtoms
[
acceptorIndex
];
a1
=
(
atoms
.
x
>
-
1
?
posq
[
atoms
.
x
]
:
make_real4
(
0
));
a2
=
(
atoms
.
y
>
-
1
?
posq
[
atoms
.
y
]
:
make_real4
(
0
));
a3
=
(
atoms
.
z
>
-
1
?
posq
[
atoms
.
z
]
:
make_real4
(
0
));
#ifdef USE_EXCLUSIONS
exclusionIndices
=
exclusions
[
acceptorIndex
];
#endif
}
else
atoms
=
make_int4
(
-
1
,
-
1
,
-
1
,
-
1
);
for
(
int
donorStart
=
0
;
donorStart
<
NUM_DONORS
;
donorStart
+=
blockDim
.
x
)
{
// Load the next block of donors into local memory.
int
blockSize
=
min
((
int
)
blockDim
.
x
,
NUM_DONORS
-
donorStart
);
if
(
threadIdx
.
x
<
blockSize
)
{
int4
atoms2
=
donorAtoms
[
donorStart
+
threadIdx
.
x
];
posBuffer
[
3
*
threadIdx
.
x
]
=
(
atoms2
.
x
>
-
1
?
posq
[
atoms2
.
x
]
:
make_real4
(
0
));
posBuffer
[
3
*
threadIdx
.
x
+
1
]
=
(
atoms2
.
y
>
-
1
?
posq
[
atoms2
.
y
]
:
make_real4
(
0
));
posBuffer
[
3
*
threadIdx
.
x
+
2
]
=
(
atoms2
.
z
>
-
1
?
posq
[
atoms2
.
z
]
:
make_real4
(
0
));
}
__syncthreads
();
if
(
acceptorIndex
<
NUM_ACCEPTORS
)
{
for
(
int
index
=
0
;
index
<
blockSize
;
index
++
)
{
#ifdef USE_EXCLUSIONS
int
donorIndex
=
donorStart
+
index
;
if
(
donorIndex
==
exclusionIndices
.
x
||
donorIndex
==
exclusionIndices
.
y
||
donorIndex
==
exclusionIndices
.
z
||
donorIndex
==
exclusionIndices
.
w
)
continue
;
#endif
// Compute the interaction between a donor and an acceptor.
real4
d1
=
posBuffer
[
3
*
index
];
real4
d2
=
posBuffer
[
3
*
index
+
1
];
real4
d3
=
posBuffer
[
3
*
index
+
2
];
real4
deltaD1A1
=
deltaPeriodic
(
d1
,
a1
,
periodicBoxSize
,
invPeriodicBoxSize
);
#ifdef USE_CUTOFF
if
(
deltaD1A1
.
w
<
CUTOFF_SQUARED
)
{
#endif
COMPUTE_ACCEPTOR_FORCE
#ifdef USE_CUTOFF
}
#endif
}
}
}
// Write results
if
(
acceptorIndex
<
NUM_ACCEPTORS
)
{
if
(
atoms
.
x
>
-
1
)
{
atomicAdd
(
&
force
[
atoms
.
x
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f1
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
x
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f1
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
x
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f1
.
z
*
0xFFFFFFFF
)));
__threadfence_block
();
}
if
(
atoms
.
y
>
-
1
)
{
atomicAdd
(
&
force
[
atoms
.
y
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f2
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
y
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f2
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
y
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f2
.
z
*
0xFFFFFFFF
)));
__threadfence_block
();
}
if
(
atoms
.
z
>
-
1
)
{
atomicAdd
(
&
force
[
atoms
.
z
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f3
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
z
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f3
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
z
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f3
.
z
*
0xFFFFFFFF
)));
__threadfence_block
();
}
}
}
}
platforms/cuda2/src/kernels/customIntegrator.cu
0 → 100644
View file @
bd22eada
extern
"C"
__global__
void
computeSum
(
const
real
*
__restrict__
sumBuffer
,
real
*
result
)
{
__shared__
real
tempBuffer
[
WORK_GROUP_SIZE
];
const
unsigned
int
thread
=
threadIdx
.
x
;
real
sum
=
0
;
for
(
unsigned
int
index
=
thread
;
index
<
SUM_BUFFER_SIZE
;
index
+=
blockDim
.
x
)
sum
+=
sumBuffer
[
index
];
tempBuffer
[
thread
]
=
sum
;
for
(
int
i
=
1
;
i
<
WORK_GROUP_SIZE
;
i
*=
2
)
{
__syncthreads
();
if
(
thread
%
(
i
*
2
)
==
0
&&
thread
+
i
<
WORK_GROUP_SIZE
)
tempBuffer
[
thread
]
+=
tempBuffer
[
thread
+
i
];
}
if
(
thread
==
0
)
result
[
SUM_OUTPUT_INDEX
]
=
tempBuffer
[
0
];
}
extern
"C"
__global__
void
applyPositionDeltas
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posDelta
)
{
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
position
=
posq
[
index
];
position
.
x
+=
posDelta
[
index
].
x
;
position
.
y
+=
posDelta
[
index
].
y
;
position
.
z
+=
posDelta
[
index
].
z
;
posq
[
index
]
=
position
;
posDelta
[
index
]
=
make_real4
(
0
,
0
,
0
,
0
);
}
}
extern
"C"
__global__
void
generateRandomNumbers
(
float4
*
__restrict__
random
,
uint4
*
__restrict__
seed
)
{
uint4
state
=
seed
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
];
unsigned
int
carry
=
0
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Generate three uniform random numbers.
state
.
x
=
state
.
x
*
69069
+
1
;
state
.
y
^=
state
.
y
<<
13
;
state
.
y
^=
state
.
y
>>
17
;
state
.
y
^=
state
.
y
<<
5
;
unsigned
int
k
=
(
state
.
z
>>
2
)
+
(
state
.
w
>>
3
)
+
(
carry
>>
2
);
unsigned
int
m
=
state
.
w
+
state
.
w
+
state
.
z
+
carry
;
state
.
z
=
state
.
w
;
state
.
w
=
m
;
carry
=
k
>>
30
;
float
x1
=
(
float
)
max
(
state
.
x
+
state
.
y
+
state
.
w
,
0x00000001u
)
/
(
float
)
0xffffffff
;
state
.
x
=
state
.
x
*
69069
+
1
;
state
.
y
^=
state
.
y
<<
13
;
state
.
y
^=
state
.
y
>>
17
;
state
.
y
^=
state
.
y
<<
5
;
k
=
(
state
.
z
>>
2
)
+
(
state
.
w
>>
3
)
+
(
carry
>>
2
);
m
=
state
.
w
+
state
.
w
+
state
.
z
+
carry
;
state
.
z
=
state
.
w
;
state
.
w
=
m
;
carry
=
k
>>
30
;
float
x2
=
(
float
)
max
(
state
.
x
+
state
.
y
+
state
.
w
,
0x00000001u
)
/
(
float
)
0xffffffff
;
state
.
x
=
state
.
x
*
69069
+
1
;
state
.
y
^=
state
.
y
<<
13
;
state
.
y
^=
state
.
y
>>
17
;
state
.
y
^=
state
.
y
<<
5
;
k
=
(
state
.
z
>>
2
)
+
(
state
.
w
>>
3
)
+
(
carry
>>
2
);
m
=
state
.
w
+
state
.
w
+
state
.
z
+
carry
;
state
.
z
=
state
.
w
;
state
.
w
=
m
;
carry
=
k
>>
30
;
float
x3
=
(
float
)
max
(
state
.
x
+
state
.
y
+
state
.
w
,
0x00000001u
)
/
(
float
)
0xffffffff
;
// Record the values.
random
[
index
]
=
make_float4
(
x1
,
x2
,
x3
,
0.0
f
);
}
seed
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
=
state
;
}
platforms/cuda2/src/kernels/customIntegratorGlobal.cu
0 → 100644
View file @
bd22eada
extern
"C"
__global__
void
computeGlobal
(
real2
*
__restrict__
dt
,
real
*
__restrict__
globals
,
real
*
__restrict__
params
,
float
uniform
,
float
gaussian
,
const
real
*
__restrict__
energy
)
{
COMPUTE_STEP
}
platforms/cuda2/src/kernels/customIntegratorPerDof.cu
0 → 100644
View file @
bd22eada
inline
__device__
double4
convertToDouble4
(
real4
a
)
{
return
make_double4
(
a
.
x
,
a
.
y
,
a
.
z
,
a
.
w
);
}
inline
__device__
real4
convertFromDouble4
(
double4
a
)
{
return
make_real4
(
a
.
x
,
a
.
y
,
a
.
z
,
a
.
w
);
}
extern
"C"
__global__
void
computePerDof
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posDelta
,
real4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
const
real2
*
__restrict__
dt
,
const
real
*
__restrict__
globals
,
const
real
*
__restrict__
params
,
real
*
__restrict__
sum
,
const
float4
*
__restrict__
gaussianValues
,
unsigned
int
randomIndex
,
const
float4
*
__restrict__
uniformValues
,
const
real
*
__restrict__
energy
PARAMETER_ARGUMENTS
)
{
real
stepSize
=
dt
[
0
].
y
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
randomIndex
+=
index
;
const
double
forceScale
=
1.0
/
0xFFFFFFFF
;
while
(
index
<
NUM_ATOMS
)
{
#ifdef LOAD_POS_AS_DELTA
double4
position
=
convertToDouble4
(
posq
[
index
]
+
posDelta
[
index
]);
#else
double4
position
=
convertToDouble4
(
posq
[
index
]);
#endif
double4
velocity
=
convertToDouble4
(
velm
[
index
]);
double4
f
=
make_double4
(
forceScale
*
force
[
index
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
],
0.0
);
double
mass
=
1.0
/
velocity
.
w
;
if
(
velocity
.
w
!=
0.0
)
{
float4
gaussian
=
gaussianValues
[
randomIndex
];
float4
uniform
=
uniformValues
[
index
];
COMPUTE_STEP
}
randomIndex
+=
blockDim
.
x
*
gridDim
.
x
;
index
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
platforms/cuda2/src/kernels/customNonbonded.cu
0 → 100644
View file @
bd22eada
#ifdef USE_CUTOFF
if
(
!
isExcluded
&&
r2
<
CUTOFF_SQUARED
)
{
#else
if
(
!
isExcluded
)
{
#endif
real
tempForce
=
0
;
COMPUTE_FORCE
dEdR
+=
tempForce
*
invR
;
}
platforms/cuda2/tests/TestCudaCustomHbondForce.cpp
0 → 100644
View file @
bd22eada
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of CustomHbondForce.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/CustomHbondForce.h"
#include "openmm/HarmonicAngleForce.h"
#include "openmm/HarmonicBondForce.h"
#include "openmm/PeriodicTorsionForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
const
double
TOL
=
1e-5
;
void
testHbond
()
{
CudaPlatform
platform
;
// Create a system using a CustomHbondForce.
System
customSystem
;
customSystem
.
addParticle
(
1.0
);
customSystem
.
addParticle
(
1.0
);
customSystem
.
addParticle
(
1.0
);
customSystem
.
addParticle
(
1.0
);
customSystem
.
addParticle
(
1.0
);
CustomHbondForce
*
custom
=
new
CustomHbondForce
(
"0.5*kr*(distance(d1,a1)-r0)^2 + 0.5*ktheta*(angle(a1,d1,d2)-theta0)^2 + 0.5*kpsi*(angle(d1,a1,a2)-psi0)^2 + kchi*(1+cos(n*dihedral(a3,a2,a1,d1)-chi0))"
);
custom
->
addPerDonorParameter
(
"r0"
);
custom
->
addPerDonorParameter
(
"theta0"
);
custom
->
addPerDonorParameter
(
"psi0"
);
custom
->
addPerAcceptorParameter
(
"chi0"
);
custom
->
addPerAcceptorParameter
(
"n"
);
custom
->
addGlobalParameter
(
"kr"
,
0.4
);
custom
->
addGlobalParameter
(
"ktheta"
,
0.5
);
custom
->
addGlobalParameter
(
"kpsi"
,
0.6
);
custom
->
addGlobalParameter
(
"kchi"
,
0.7
);
vector
<
double
>
parameters
(
3
);
parameters
[
0
]
=
1.5
;
parameters
[
1
]
=
1.7
;
parameters
[
2
]
=
1.9
;
custom
->
addDonor
(
1
,
0
,
-
1
,
parameters
);
parameters
.
resize
(
2
);
parameters
[
0
]
=
2.1
;
parameters
[
1
]
=
2
;
custom
->
addAcceptor
(
2
,
3
,
4
,
parameters
);
custom
->
setCutoffDistance
(
10.0
);
customSystem
.
addForce
(
custom
);
// Create an identical system using HarmonicBondForce, HarmonicAngleForce, and PeriodicTorsionForce.
System
standardSystem
;
standardSystem
.
addParticle
(
1.0
);
standardSystem
.
addParticle
(
1.0
);
standardSystem
.
addParticle
(
1.0
);
standardSystem
.
addParticle
(
1.0
);
standardSystem
.
addParticle
(
1.0
);
HarmonicBondForce
*
bond
=
new
HarmonicBondForce
();
bond
->
addBond
(
1
,
2
,
1.5
,
0.4
);
standardSystem
.
addForce
(
bond
);
HarmonicAngleForce
*
angle
=
new
HarmonicAngleForce
();
angle
->
addAngle
(
0
,
1
,
2
,
1.7
,
0.5
);
angle
->
addAngle
(
1
,
2
,
3
,
1.9
,
0.6
);
standardSystem
.
addForce
(
angle
);
PeriodicTorsionForce
*
torsion
=
new
PeriodicTorsionForce
();
torsion
->
addTorsion
(
1
,
2
,
3
,
4
,
2
,
2.1
,
0.7
);
standardSystem
.
addForce
(
torsion
);
// Set the atoms in various positions, and verify that both systems give identical forces and energy.
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
vector
<
Vec3
>
positions
(
5
);
VerletIntegrator
integrator1
(
0.01
);
VerletIntegrator
integrator2
(
0.01
);
Context
c1
(
customSystem
,
integrator1
,
platform
);
Context
c2
(
standardSystem
,
integrator2
,
platform
);
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
for
(
int
j
=
0
;
j
<
(
int
)
positions
.
size
();
j
++
)
positions
[
j
]
=
Vec3
(
2.0
*
genrand_real2
(
sfmt
),
2.0
*
genrand_real2
(
sfmt
),
2.0
*
genrand_real2
(
sfmt
));
c1
.
setPositions
(
positions
);
c2
.
setPositions
(
positions
);
State
s1
=
c1
.
getState
(
State
::
Forces
|
State
::
Energy
);
State
s2
=
c2
.
getState
(
State
::
Forces
|
State
::
Energy
);
for
(
int
i
=
0
;
i
<
customSystem
.
getNumParticles
();
i
++
)
ASSERT_EQUAL_VEC
(
s2
.
getForces
()[
i
],
s1
.
getForces
()[
i
],
TOL
);
ASSERT_EQUAL_TOL
(
s2
.
getPotentialEnergy
(),
s1
.
getPotentialEnergy
(),
TOL
);
}
// Try changing the parameters and make sure it's still correct.
parameters
.
resize
(
3
);
parameters
[
0
]
=
1.4
;
parameters
[
1
]
=
1.7
;
parameters
[
2
]
=
1.9
;
custom
->
setDonorParameters
(
0
,
1
,
0
,
-
1
,
parameters
);
parameters
.
resize
(
2
);
parameters
[
0
]
=
2.2
;
parameters
[
1
]
=
2
;
custom
->
setAcceptorParameters
(
0
,
2
,
3
,
4
,
parameters
);
bond
->
setBondParameters
(
0
,
1
,
2
,
1.4
,
0.4
);
torsion
->
setTorsionParameters
(
0
,
1
,
2
,
3
,
4
,
2
,
2.2
,
0.7
);
custom
->
updateParametersInContext
(
c1
);
bond
->
updateParametersInContext
(
c2
);
torsion
->
updateParametersInContext
(
c2
);
State
s1
=
c1
.
getState
(
State
::
Forces
|
State
::
Energy
);
State
s2
=
c2
.
getState
(
State
::
Forces
|
State
::
Energy
);
for
(
int
i
=
0
;
i
<
customSystem
.
getNumParticles
();
i
++
)
ASSERT_EQUAL_VEC
(
s2
.
getForces
()[
i
],
s1
.
getForces
()[
i
],
TOL
);
ASSERT_EQUAL_TOL
(
s2
.
getPotentialEnergy
(),
s1
.
getPotentialEnergy
(),
TOL
);
}
void
testExclusions
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomHbondForce
*
custom
=
new
CustomHbondForce
(
"(distance(d1,a1)-1)^2"
);
custom
->
addDonor
(
0
,
1
,
-
1
,
vector
<
double
>
());
custom
->
addDonor
(
1
,
0
,
-
1
,
vector
<
double
>
());
custom
->
addAcceptor
(
2
,
0
,
-
1
,
vector
<
double
>
());
custom
->
addExclusion
(
1
,
0
);
system
.
addForce
(
custom
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
3
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
0
,
2
,
0
);
positions
[
2
]
=
Vec3
(
2
,
0
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
ASSERT_EQUAL_VEC
(
Vec3
(
2
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
2
,
0
,
0
),
forces
[
2
],
TOL
);
ASSERT_EQUAL_TOL
(
1.0
,
state
.
getPotentialEnergy
(),
TOL
);
}
void
testCutoff
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomHbondForce
*
custom
=
new
CustomHbondForce
(
"(distance(d1,a1)-1)^2"
);
custom
->
addDonor
(
0
,
1
,
-
1
,
vector
<
double
>
());
custom
->
addDonor
(
1
,
0
,
-
1
,
vector
<
double
>
());
custom
->
addAcceptor
(
2
,
0
,
-
1
,
vector
<
double
>
());
custom
->
setNonbondedMethod
(
CustomHbondForce
::
CutoffNonPeriodic
);
custom
->
setCutoffDistance
(
2.5
);
system
.
addForce
(
custom
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
3
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
0
,
3
,
0
);
positions
[
2
]
=
Vec3
(
2
,
0
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
ASSERT_EQUAL_VEC
(
Vec3
(
2
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
2
,
0
,
0
),
forces
[
2
],
TOL
);
ASSERT_EQUAL_TOL
(
1.0
,
state
.
getPotentialEnergy
(),
TOL
);
}
void
testCustomFunctions
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomHbondForce
*
custom
=
new
CustomHbondForce
(
"foo(distance(d1,a1))"
);
custom
->
addDonor
(
1
,
0
,
-
1
,
vector
<
double
>
());
custom
->
addDonor
(
2
,
0
,
-
1
,
vector
<
double
>
());
custom
->
addAcceptor
(
0
,
1
,
-
1
,
vector
<
double
>
());
vector
<
double
>
function
(
2
);
function
[
0
]
=
0
;
function
[
1
]
=
1
;
custom
->
addFunction
(
"foo"
,
function
,
0
,
10
);
system
.
addForce
(
custom
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
3
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
0
,
2
,
0
);
positions
[
2
]
=
Vec3
(
2
,
0
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
ASSERT_EQUAL_VEC
(
Vec3
(
0.1
,
0.1
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
-
0.1
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
0.1
,
0
,
0
),
forces
[
2
],
TOL
);
ASSERT_EQUAL_TOL
(
0.1
*
2
+
0.1
*
2
,
state
.
getPotentialEnergy
(),
TOL
);
}
int
main
()
{
try
{
testHbond
();
testExclusions
();
testCutoff
();
testCustomFunctions
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
platforms/cuda2/tests/TestCudaCustomIntegrator.cpp
0 → 100644
View file @
bd22eada
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of CustomIntegrator.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/AndersenThermostat.h"
#include "openmm/HarmonicBondForce.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/CustomIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
const
double
TOL
=
1e-5
;
/**
* Test a simple leapfrog integrator on a single bond.
*/
void
testSingleBond
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
2.0
);
system
.
addParticle
(
2.0
);
CustomIntegrator
integrator
(
0.01
);
integrator
.
addComputePerDof
(
"v"
,
"v+dt*f/m"
);
integrator
.
addComputePerDof
(
"x"
,
"x+dt*v"
);
HarmonicBondForce
*
forceField
=
new
HarmonicBondForce
();
forceField
->
addBond
(
0
,
1
,
1.5
,
1
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
-
1
,
0
,
0
);
positions
[
1
]
=
Vec3
(
1
,
0
,
0
);
context
.
setPositions
(
positions
);
// This is simply a harmonic oscillator, so compare it to the analytical solution.
const
double
freq
=
1.0
;;
State
state
=
context
.
getState
(
State
::
Energy
);
const
double
initialEnergy
=
state
.
getKineticEnergy
()
+
state
.
getPotentialEnergy
();
for
(
int
i
=
0
;
i
<
1000
;
++
i
)
{
state
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Energy
);
double
time
=
state
.
getTime
();
double
expectedDist
=
1.5
+
0.5
*
std
::
cos
(
freq
*
time
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
0.5
*
expectedDist
,
0
,
0
),
state
.
getPositions
()[
0
],
0.02
);
ASSERT_EQUAL_VEC
(
Vec3
(
0.5
*
expectedDist
,
0
,
0
),
state
.
getPositions
()[
1
],
0.02
);
double
expectedSpeed
=
-
0.5
*
freq
*
std
::
sin
(
freq
*
time
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
0.5
*
expectedSpeed
,
0
,
0
),
state
.
getVelocities
()[
0
],
0.02
);
ASSERT_EQUAL_VEC
(
Vec3
(
0.5
*
expectedSpeed
,
0
,
0
),
state
.
getVelocities
()[
1
],
0.02
);
double
energy
=
state
.
getKineticEnergy
()
+
state
.
getPotentialEnergy
();
ASSERT_EQUAL_TOL
(
initialEnergy
,
energy
,
0.01
);
integrator
.
step
(
1
);
}
}
/**
* Test an integrator that enforces constraints.
*/
void
testConstraints
()
{
const
int
numParticles
=
8
;
const
double
temp
=
500.0
;
CudaPlatform
platform
;
System
system
;
CustomIntegrator
integrator
(
0.002
);
integrator
.
addPerDofVariable
(
"oldx"
,
0
);
integrator
.
addComputePerDof
(
"v"
,
"v+dt*f/m"
);
integrator
.
addComputePerDof
(
"oldx"
,
"x"
);
integrator
.
addComputePerDof
(
"x"
,
"x+dt*v"
);
integrator
.
addConstrainPositions
();
integrator
.
addComputePerDof
(
"v"
,
"(x-oldx)/dt"
);
integrator
.
setConstraintTolerance
(
1e-5
);
NonbondedForce
*
forceField
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
i
%
2
==
0
?
5.0
:
10.0
);
forceField
->
addParticle
((
i
%
2
==
0
?
0.2
:
-
0.2
),
0.5
,
5.0
);
}
for
(
int
i
=
0
;
i
<
numParticles
-
1
;
++
i
)
system
.
addConstraint
(
i
,
i
+
1
,
1.0
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
Vec3
>
velocities
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
positions
[
i
]
=
Vec3
(
i
/
2
,
(
i
+
1
)
/
2
,
0
);
velocities
[
i
]
=
Vec3
(
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
);
}
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
// Simulate it and see whether the constraints remain satisfied.
double
initialEnergy
=
0.0
;
for
(
int
i
=
0
;
i
<
1000
;
++
i
)
{
State
state
=
context
.
getState
(
State
::
Positions
|
State
::
Energy
);
for
(
int
j
=
0
;
j
<
system
.
getNumConstraints
();
++
j
)
{
int
particle1
,
particle2
;
double
distance
;
system
.
getConstraintParameters
(
j
,
particle1
,
particle2
,
distance
);
Vec3
p1
=
state
.
getPositions
()[
particle1
];
Vec3
p2
=
state
.
getPositions
()[
particle2
];
double
dist
=
std
::
sqrt
((
p1
[
0
]
-
p2
[
0
])
*
(
p1
[
0
]
-
p2
[
0
])
+
(
p1
[
1
]
-
p2
[
1
])
*
(
p1
[
1
]
-
p2
[
1
])
+
(
p1
[
2
]
-
p2
[
2
])
*
(
p1
[
2
]
-
p2
[
2
]));
ASSERT_EQUAL_TOL
(
distance
,
dist
,
2e-5
);
}
double
energy
=
state
.
getKineticEnergy
()
+
state
.
getPotentialEnergy
();
if
(
i
==
1
)
initialEnergy
=
energy
;
else
if
(
i
>
1
)
ASSERT_EQUAL_TOL
(
initialEnergy
,
energy
,
0.01
);
integrator
.
step
(
1
);
}
}
/**
* Test an integrator that applies constraints directly to velocities.
*/
void
testVelocityConstraints
()
{
const
int
numParticles
=
10
;
CudaPlatform
platform
;
System
system
;
CustomIntegrator
integrator
(
0.002
);
integrator
.
addPerDofVariable
(
"x1"
,
0
);
integrator
.
addComputePerDof
(
"v"
,
"v+0.5*dt*f/m"
);
integrator
.
addComputePerDof
(
"x"
,
"x+dt*v"
);
integrator
.
addComputePerDof
(
"x1"
,
"x"
);
integrator
.
addConstrainPositions
();
integrator
.
addComputePerDof
(
"v"
,
"v+0.5*dt*f/m+(x-x1)/dt"
);
integrator
.
addConstrainVelocities
();
integrator
.
setConstraintTolerance
(
1e-5
);
NonbondedForce
*
forceField
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
i
%
2
==
0
?
5.0
:
10.0
);
forceField
->
addParticle
((
i
%
2
==
0
?
0.2
:
-
0.2
),
0.5
,
5.0
);
}
// Constrain the first three particles with SHAKE.
system
.
addConstraint
(
0
,
1
,
1.0
);
system
.
addConstraint
(
1
,
2
,
1.0
);
// Constrain the next three with SETTLE.
system
.
addConstraint
(
3
,
4
,
1.0
);
system
.
addConstraint
(
5
,
4
,
1.0
);
system
.
addConstraint
(
3
,
5
,
sqrt
(
2.0
));
// Constraint the rest with CCMA.
for
(
int
i
=
6
;
i
<
numParticles
-
1
;
++
i
)
system
.
addConstraint
(
i
,
i
+
1
,
1.0
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
Vec3
>
velocities
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
positions
[
i
]
=
Vec3
(
i
/
2
,
(
i
+
1
)
/
2
,
0
);
velocities
[
i
]
=
Vec3
(
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
);
}
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
// Simulate it and see whether the constraints remain satisfied.
double
initialEnergy
=
0.0
;
for
(
int
i
=
0
;
i
<
1000
;
++
i
)
{
integrator
.
step
(
2
);
State
state
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Energy
);
for
(
int
j
=
0
;
j
<
system
.
getNumConstraints
();
++
j
)
{
int
particle1
,
particle2
;
double
distance
;
system
.
getConstraintParameters
(
j
,
particle1
,
particle2
,
distance
);
Vec3
p1
=
state
.
getPositions
()[
particle1
];
Vec3
p2
=
state
.
getPositions
()[
particle2
];
double
dist
=
std
::
sqrt
((
p1
[
0
]
-
p2
[
0
])
*
(
p1
[
0
]
-
p2
[
0
])
+
(
p1
[
1
]
-
p2
[
1
])
*
(
p1
[
1
]
-
p2
[
1
])
+
(
p1
[
2
]
-
p2
[
2
])
*
(
p1
[
2
]
-
p2
[
2
]));
ASSERT_EQUAL_TOL
(
distance
,
dist
,
2e-5
);
if
(
i
>
0
)
{
Vec3
v1
=
state
.
getVelocities
()[
particle1
];
Vec3
v2
=
state
.
getVelocities
()[
particle2
];
double
vel
=
(
v1
-
v2
).
dot
(
p1
-
p2
);
ASSERT_EQUAL_TOL
(
0.0
,
vel
,
2e-5
);
}
}
double
energy
=
state
.
getKineticEnergy
()
+
state
.
getPotentialEnergy
();
if
(
i
==
0
)
initialEnergy
=
energy
;
else
if
(
i
>
0
)
ASSERT_EQUAL_TOL
(
initialEnergy
,
energy
,
0.01
);
}
}
/**
* Test an integrator with an AndersenThermostat to see if updateContextState()
* is being handled correctly.
*/
void
testWithThermostat
()
{
const
int
numParticles
=
8
;
const
double
temp
=
100.0
;
const
double
collisionFreq
=
10.0
;
const
int
numSteps
=
10000
;
CudaPlatform
platform
;
System
system
;
CustomIntegrator
integrator
(
0.005
);
integrator
.
addUpdateContextState
();
integrator
.
addComputePerDof
(
"v"
,
"v+dt*f/m"
);
integrator
.
addComputePerDof
(
"x"
,
"x+dt*v"
);
NonbondedForce
*
forceField
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
2.0
);
forceField
->
addParticle
((
i
%
2
==
0
?
1.0
:
-
1.0
),
1.0
,
5.0
);
}
system
.
addForce
(
forceField
);
AndersenThermostat
*
thermostat
=
new
AndersenThermostat
(
temp
,
collisionFreq
);
system
.
addForce
(
thermostat
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
positions
[
i
]
=
Vec3
((
i
%
2
==
0
?
2
:
-
2
),
(
i
%
4
<
2
?
2
:
-
2
),
(
i
<
4
?
2
:
-
2
));
context
.
setPositions
(
positions
);
// Let it equilibrate.
integrator
.
step
(
10000
);
// Now run it for a while and see if the temperature is correct.
double
ke
=
0.0
;
for
(
int
i
=
0
;
i
<
numSteps
;
++
i
)
{
State
state
=
context
.
getState
(
State
::
Energy
);
ke
+=
state
.
getKineticEnergy
();
integrator
.
step
(
1
);
}
ke
/=
numSteps
;
double
expected
=
0.5
*
numParticles
*
3
*
BOLTZ
*
temp
;
ASSERT_USUALLY_EQUAL_TOL
(
expected
,
ke
,
6
/
std
::
sqrt
((
double
)
numSteps
));
}
/**
* Test a Monte Carlo integrator that uses global variables and depends on energy.
*/
void
testMonteCarlo
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
CustomIntegrator
integrator
(
0.1
);
const
double
kT
=
BOLTZ
*
300.0
;
integrator
.
addGlobalVariable
(
"kT"
,
kT
);
integrator
.
addGlobalVariable
(
"oldE"
,
0
);
integrator
.
addGlobalVariable
(
"accept"
,
0
);
integrator
.
addPerDofVariable
(
"oldx"
,
0
);
integrator
.
addComputeGlobal
(
"oldE"
,
"energy"
);
integrator
.
addComputePerDof
(
"oldx"
,
"x"
);
integrator
.
addComputePerDof
(
"x"
,
"x+dt*gaussian"
);
integrator
.
addComputeGlobal
(
"accept"
,
"step(exp((oldE-energy)/kT)-uniform)"
);
integrator
.
addComputePerDof
(
"x"
,
"accept*x + (1-accept)*oldx"
);
HarmonicBondForce
*
forceField
=
new
HarmonicBondForce
();
forceField
->
addBond
(
0
,
1
,
2.0
,
10.0
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
-
1
,
0
,
0
);
positions
[
1
]
=
Vec3
(
1
,
0
,
0
);
context
.
setPositions
(
positions
);
// Compute the histogram of distances and see if it satisfies a Boltzmann distribution.
const
int
numBins
=
100
;
const
double
maxDist
=
4.0
;
const
int
numIterations
=
5000
;
vector
<
int
>
counts
(
numBins
,
0
);
for
(
int
i
=
0
;
i
<
numIterations
;
++
i
)
{
integrator
.
step
(
10
);
State
state
=
context
.
getState
(
State
::
Positions
);
Vec3
delta
=
state
.
getPositions
()[
0
]
-
state
.
getPositions
()[
1
];
double
dist
=
sqrt
(
delta
.
dot
(
delta
));
if
(
dist
<
maxDist
)
counts
[(
int
)
(
numBins
*
dist
/
maxDist
)]
++
;
}
vector
<
double
>
expected
(
numBins
,
0
);
double
sum
=
0
;
for
(
int
i
=
0
;
i
<
numBins
;
i
++
)
{
double
dist
=
(
i
+
0.5
)
*
maxDist
/
numBins
;
expected
[
i
]
=
dist
*
dist
*
exp
(
-
5.0
*
(
dist
-
2
)
*
(
dist
-
2
)
/
kT
);
sum
+=
expected
[
i
];
}
for
(
int
i
=
0
;
i
<
numBins
;
i
++
)
ASSERT_USUALLY_EQUAL_TOL
((
double
)
counts
[
i
]
/
numIterations
,
expected
[
i
]
/
sum
,
0.01
);
}
/**
* Test the ComputeSum operation.
*/
void
testSum
()
{
const
int
numParticles
=
200
;
const
double
boxSize
=
10
;
CudaPlatform
platform
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
NonbondedForce
*
nb
=
new
NonbondedForce
();
system
.
addForce
(
nb
);
vector
<
Vec3
>
positions
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
1.5
);
nb
->
addParticle
(
i
%
2
==
0
?
1
:
-
1
,
0.1
,
1
);
bool
close
=
true
;
while
(
close
)
{
positions
[
i
]
=
Vec3
(
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
));
close
=
false
;
for
(
int
j
=
0
;
j
<
i
;
++
j
)
{
Vec3
delta
=
positions
[
i
]
-
positions
[
j
];
if
(
delta
.
dot
(
delta
)
<
0.1
)
close
=
true
;
}
}
}
CustomIntegrator
integrator
(
0.01
);
integrator
.
addGlobalVariable
(
"ke"
,
0
);
integrator
.
addComputePerDof
(
"v"
,
"v+dt*f/m"
);
integrator
.
addComputePerDof
(
"x"
,
"x+dt*v"
);
integrator
.
addComputeSum
(
"ke"
,
"m*v*v/2"
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
// See if the sum is being computed correctly.
State
state
=
context
.
getState
(
State
::
Energy
);
const
double
initialEnergy
=
state
.
getKineticEnergy
()
+
state
.
getPotentialEnergy
();
for
(
int
i
=
0
;
i
<
100
;
++
i
)
{
state
=
context
.
getState
(
State
::
Energy
);
ASSERT_EQUAL_TOL
(
state
.
getKineticEnergy
(),
integrator
.
getGlobalVariable
(
0
),
1e-5
);
integrator
.
step
(
1
);
}
}
/**
* Test an integrator that both uses and modifies a context parameter.
*/
void
testParameter
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
AndersenThermostat
*
thermostat
=
new
AndersenThermostat
(
0.1
,
0.1
);
system
.
addForce
(
thermostat
);
CustomIntegrator
integrator
(
0.1
);
integrator
.
addGlobalVariable
(
"temp"
,
0
);
integrator
.
addComputeGlobal
(
"temp"
,
"AndersenTemperature"
);
integrator
.
addComputeGlobal
(
"AndersenTemperature"
,
"temp*2"
);
Context
context
(
system
,
integrator
,
platform
);
// See if the parameter is being used correctly.
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
integrator
.
step
(
1
);
ASSERT_EQUAL_TOL
(
context
.
getParameter
(
"AndersenTemperature"
),
0.1
*
(
1
<<
(
i
+
1
)),
1e-5
);
}
}
/**
* Test random number distributions.
*/
void
testRandomDistributions
()
{
const
int
numParticles
=
100
;
const
int
numBins
=
20
;
const
int
numSteps
=
100
;
CudaPlatform
platform
;
System
system
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
CustomIntegrator
integrator
(
0.1
);
integrator
.
addPerDofVariable
(
"a"
,
0
);
integrator
.
addPerDofVariable
(
"b"
,
0
);
integrator
.
addComputePerDof
(
"a"
,
"uniform"
);
integrator
.
addComputePerDof
(
"b"
,
"gaussian"
);
Context
context
(
system
,
integrator
,
platform
);
// See if the random numbers are distributed correctly.
vector
<
int
>
bins
(
numBins
);
double
mean
=
0.0
;
double
var
=
0.0
;
double
skew
=
0.0
;
double
kurtosis
=
0.0
;
vector
<
Vec3
>
values
;
for
(
int
i
=
0
;
i
<
numSteps
;
i
++
)
{
integrator
.
step
(
1
);
integrator
.
getPerDofVariable
(
0
,
values
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
double
v
=
values
[
i
][
j
];
ASSERT
(
v
>=
0
&&
v
<
1
);
bins
[(
int
)
(
v
*
numBins
)]
++
;
}
integrator
.
getPerDofVariable
(
1
,
values
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
double
v
=
values
[
i
][
j
];
mean
+=
v
;
var
+=
v
*
v
;
skew
+=
v
*
v
*
v
;
kurtosis
+=
v
*
v
*
v
*
v
;
}
}
// Check the distribution of uniform randoms.
int
numValues
=
numParticles
*
numSteps
*
3
;
double
expected
=
numValues
/
(
double
)
numBins
;
double
tol
=
4
*
sqrt
(
expected
);
for
(
int
i
=
0
;
i
<
numBins
;
i
++
)
ASSERT
(
bins
[
i
]
>=
expected
-
tol
&&
bins
[
i
]
<=
expected
+
tol
);
// Check the distribution of gaussian randoms.
mean
/=
numValues
;
var
/=
numValues
;
skew
/=
numValues
;
kurtosis
/=
numValues
;
double
c2
=
var
-
mean
*
mean
;
double
c3
=
skew
-
3
*
var
*
mean
+
2
*
mean
*
mean
*
mean
;
double
c4
=
kurtosis
-
4
*
skew
*
mean
-
3
*
var
*
var
+
12
*
var
*
mean
*
mean
-
6
*
mean
*
mean
*
mean
*
mean
;
ASSERT_EQUAL_TOL
(
0.0
,
mean
,
3.0
/
sqrt
((
double
)
numValues
));
ASSERT_EQUAL_TOL
(
1.0
,
c2
,
3.0
/
pow
(
numValues
,
1.0
/
3.0
));
ASSERT_EQUAL_TOL
(
0.0
,
c3
,
3.0
/
pow
(
numValues
,
1.0
/
4.0
));
ASSERT_EQUAL_TOL
(
0.0
,
c4
,
3.0
/
pow
(
numValues
,
1.0
/
4.0
));
}
/**
* Test getting and setting per-DOF variables.
*/
void
testPerDofVariables
()
{
const
int
numParticles
=
200
;
const
double
boxSize
=
10
;
CudaPlatform
platform
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
NonbondedForce
*
nb
=
new
NonbondedForce
();
system
.
addForce
(
nb
);
nb
->
setNonbondedMethod
(
NonbondedForce
::
CutoffNonPeriodic
);
vector
<
Vec3
>
positions
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
1.5
);
nb
->
addParticle
(
i
%
2
==
0
?
1
:
-
1
,
0.1
,
1
);
bool
close
=
true
;
while
(
close
)
{
positions
[
i
]
=
Vec3
(
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
));
close
=
false
;
for
(
int
j
=
0
;
j
<
i
;
++
j
)
{
Vec3
delta
=
positions
[
i
]
-
positions
[
j
];
if
(
delta
.
dot
(
delta
)
<
0.1
)
close
=
true
;
}
}
}
CustomIntegrator
integrator
(
0.01
);
integrator
.
addPerDofVariable
(
"temp"
,
0
);
integrator
.
addPerDofVariable
(
"pos"
,
0
);
integrator
.
addComputePerDof
(
"v"
,
"v+dt*f/m"
);
integrator
.
addComputePerDof
(
"x"
,
"x+dt*v"
);
integrator
.
addComputePerDof
(
"pos"
,
"x"
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
vector
<
Vec3
>
initialValues
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
initialValues
[
i
]
=
Vec3
(
i
+
0.1
,
i
+
0.2
,
i
+
0.3
);
integrator
.
setPerDofVariable
(
0
,
initialValues
);
// Run a simulation, then query per-DOF values and see if they are correct.
vector
<
Vec3
>
values
;
context
.
getState
(
State
::
Forces
);
// Cause atom reordering to happen before the first step
for
(
int
i
=
0
;
i
<
200
;
++
i
)
{
integrator
.
step
(
1
);
State
state
=
context
.
getState
(
State
::
Positions
);
integrator
.
getPerDofVariable
(
0
,
values
);
for
(
int
j
=
0
;
j
<
numParticles
;
j
++
)
ASSERT_EQUAL_VEC
(
initialValues
[
j
],
values
[
j
],
1e-5
);
integrator
.
getPerDofVariable
(
1
,
values
);
for
(
int
j
=
0
;
j
<
numParticles
;
j
++
)
ASSERT_EQUAL_VEC
(
state
.
getPositions
()[
j
],
values
[
j
],
1e-5
);
}
}
/**
* Test evaluating force groups separately.
*/
void
testForceGroups
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
2.0
);
system
.
addParticle
(
2.0
);
CustomIntegrator
integrator
(
0.01
);
integrator
.
addPerDofVariable
(
"outf"
,
0
);
integrator
.
addPerDofVariable
(
"outf1"
,
0
);
integrator
.
addPerDofVariable
(
"outf2"
,
0
);
integrator
.
addGlobalVariable
(
"oute"
,
0
);
integrator
.
addGlobalVariable
(
"oute1"
,
0
);
integrator
.
addGlobalVariable
(
"oute2"
,
0
);
integrator
.
addComputePerDof
(
"outf"
,
"f"
);
integrator
.
addComputePerDof
(
"outf1"
,
"f1"
);
integrator
.
addComputePerDof
(
"outf2"
,
"f2"
);
integrator
.
addComputeGlobal
(
"oute"
,
"energy"
);
integrator
.
addComputeGlobal
(
"oute1"
,
"energy1"
);
integrator
.
addComputeGlobal
(
"oute2"
,
"energy2"
);
HarmonicBondForce
*
bonds
=
new
HarmonicBondForce
();
bonds
->
addBond
(
0
,
1
,
1.5
,
1.1
);
bonds
->
setForceGroup
(
1
);
system
.
addForce
(
bonds
);
NonbondedForce
*
nb
=
new
NonbondedForce
();
nb
->
addParticle
(
0.2
,
1
,
0
);
nb
->
addParticle
(
0.2
,
1
,
0
);
nb
->
setForceGroup
(
2
);
system
.
addForce
(
nb
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
-
1
,
0
,
0
);
positions
[
1
]
=
Vec3
(
1
,
0
,
0
);
context
.
setPositions
(
positions
);
// See if the various forces are computed correctly.
integrator
.
step
(
1
);
vector
<
Vec3
>
f
,
f1
,
f2
;
double
e1
=
0.5
*
1.1
*
0.5
*
0.5
;
double
e2
=
138.935456
*
0.2
*
0.2
/
2.0
;
integrator
.
getPerDofVariable
(
0
,
f
);
integrator
.
getPerDofVariable
(
1
,
f1
);
integrator
.
getPerDofVariable
(
2
,
f2
);
ASSERT_EQUAL_VEC
(
Vec3
(
1.1
*
0.5
,
0
,
0
),
f1
[
0
],
1e-5
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
1.1
*
0.5
,
0
,
0
),
f1
[
1
],
1e-5
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
138.935456
*
0.2
*
0.2
/
4.0
,
0
,
0
),
f2
[
0
],
1e-5
);
ASSERT_EQUAL_VEC
(
Vec3
(
138.935456
*
0.2
*
0.2
/
4.0
,
0
,
0
),
f2
[
1
],
1e-5
);
ASSERT_EQUAL_VEC
(
f1
[
0
]
+
f2
[
0
],
f
[
0
],
1e-5
);
ASSERT_EQUAL_VEC
(
f1
[
1
]
+
f2
[
1
],
f
[
1
],
1e-5
);
ASSERT_EQUAL_TOL
(
e1
,
integrator
.
getGlobalVariable
(
1
),
1e-5
);
ASSERT_EQUAL_TOL
(
e2
,
integrator
.
getGlobalVariable
(
2
),
1e-5
);
ASSERT_EQUAL_TOL
(
e1
+
e2
,
integrator
.
getGlobalVariable
(
0
),
1e-5
);
// Make sure they also match the values returned by the Context.
State
s
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
,
false
);
State
s1
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
,
false
,
2
);
State
s2
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
,
false
,
4
);
vector
<
Vec3
>
c
,
c1
,
c2
;
c
=
context
.
getState
(
State
::
Forces
,
false
).
getForces
();
c1
=
context
.
getState
(
State
::
Forces
,
false
,
2
).
getForces
();
c2
=
context
.
getState
(
State
::
Forces
,
false
,
4
).
getForces
();
ASSERT_EQUAL_VEC
(
f
[
0
],
c
[
0
],
1e-5
);
ASSERT_EQUAL_VEC
(
f
[
1
],
c
[
1
],
1e-5
);
ASSERT_EQUAL_VEC
(
f1
[
0
],
c1
[
0
],
1e-5
);
ASSERT_EQUAL_VEC
(
f1
[
1
],
c1
[
1
],
1e-5
);
ASSERT_EQUAL_VEC
(
f2
[
0
],
c2
[
0
],
1e-5
);
ASSERT_EQUAL_VEC
(
f2
[
1
],
c2
[
1
],
1e-5
);
ASSERT_EQUAL_TOL
(
s
.
getPotentialEnergy
(),
integrator
.
getGlobalVariable
(
0
),
1e-5
);
ASSERT_EQUAL_TOL
(
s1
.
getPotentialEnergy
(),
integrator
.
getGlobalVariable
(
1
),
1e-5
);
ASSERT_EQUAL_TOL
(
s2
.
getPotentialEnergy
(),
integrator
.
getGlobalVariable
(
2
),
1e-5
);
}
/**
* Test a multiple time step r-RESPA integrator.
*/
void
testRespa
()
{
const
int
numParticles
=
8
;
CudaPlatform
platform
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
4
,
0
,
0
),
Vec3
(
0
,
4
,
0
),
Vec3
(
0
,
0
,
4
));
CustomIntegrator
integrator
(
0.002
);
integrator
.
addComputePerDof
(
"v"
,
"v+0.5*dt*f1/m"
);
for
(
int
i
=
0
;
i
<
2
;
i
++
)
{
integrator
.
addComputePerDof
(
"v"
,
"v+0.5*(dt/2)*f0/m"
);
integrator
.
addComputePerDof
(
"x"
,
"x+(dt/2)*v"
);
integrator
.
addComputePerDof
(
"v"
,
"v+0.5*(dt/2)*f0/m"
);
}
integrator
.
addComputePerDof
(
"v"
,
"v+0.5*dt*f1/m"
);
HarmonicBondForce
*
bonds
=
new
HarmonicBondForce
();
for
(
int
i
=
0
;
i
<
numParticles
-
2
;
i
++
)
bonds
->
addBond
(
i
,
i
+
1
,
1.0
,
0.5
);
system
.
addForce
(
bonds
);
NonbondedForce
*
nb
=
new
NonbondedForce
();
nb
->
setCutoffDistance
(
2.0
);
nb
->
setNonbondedMethod
(
NonbondedForce
::
Ewald
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
i
%
2
==
0
?
5.0
:
10.0
);
nb
->
addParticle
((
i
%
2
==
0
?
0.2
:
-
0.2
),
0.5
,
5.0
);
}
nb
->
setForceGroup
(
1
);
nb
->
setReciprocalSpaceForceGroup
(
0
);
system
.
addForce
(
nb
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
Vec3
>
velocities
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
positions
[
i
]
=
Vec3
(
i
/
2
,
(
i
+
1
)
/
2
,
0
);
velocities
[
i
]
=
Vec3
(
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
);
}
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
// Simulate it and monitor energy conservations.
double
initialEnergy
=
0.0
;
for
(
int
i
=
0
;
i
<
1000
;
++
i
)
{
State
state
=
context
.
getState
(
State
::
Energy
);
double
energy
=
state
.
getKineticEnergy
()
+
state
.
getPotentialEnergy
();
if
(
i
==
1
)
initialEnergy
=
energy
;
else
if
(
i
>
1
)
ASSERT_EQUAL_TOL
(
initialEnergy
,
energy
,
0.05
);
integrator
.
step
(
2
);
}
}
int
main
()
{
try
{
testSingleBond
();
testConstraints
();
testVelocityConstraints
();
testWithThermostat
();
testMonteCarlo
();
testSum
();
testParameter
();
testRandomDistributions
();
testPerDofVariables
();
testForceGroups
();
testRespa
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
platforms/cuda2/tests/TestCudaCustomNonbondedForce.cpp
0 → 100644
View file @
bd22eada
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests all the different force terms in the CUDA implementation of CustomNonbondedForce.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "sfmt/SFMT.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/CustomNonbondedForce.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
const
double
TOL
=
1e-5
;
void
testSimpleExpression
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
forceField
=
new
CustomNonbondedForce
(
"-0.1*r^3"
);
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
addParticle
(
vector
<
double
>
());
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
2
,
0
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
double
force
=
0.1
*
3
*
(
2
*
2
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
force
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
force
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_TOL
(
-
0.1
*
(
2
*
2
*
2
),
state
.
getPotentialEnergy
(),
TOL
);
}
void
testParameters
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
forceField
=
new
CustomNonbondedForce
(
"scale*a*(r*b)^3; a=a1*a2; b=c+b1+b2"
);
forceField
->
addPerParticleParameter
(
"a"
);
forceField
->
addPerParticleParameter
(
"b"
);
forceField
->
addGlobalParameter
(
"scale"
,
3.0
);
forceField
->
addGlobalParameter
(
"c"
,
-
1.0
);
vector
<
double
>
params
(
2
);
params
[
0
]
=
1.5
;
params
[
1
]
=
2.0
;
forceField
->
addParticle
(
params
);
params
[
0
]
=
2.0
;
params
[
1
]
=
3.0
;
forceField
->
addParticle
(
params
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
2
,
0
,
0
);
context
.
setPositions
(
positions
);
context
.
setParameter
(
"scale"
,
1.0
);
context
.
setParameter
(
"c"
,
0.0
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
vector
<
Vec3
>
forces
=
state
.
getForces
();
double
force
=
-
3.0
*
3
*
5.0
*
(
10
*
10
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
force
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
force
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_TOL
(
3.0
*
(
10
*
10
*
10
),
state
.
getPotentialEnergy
(),
TOL
);
// Try changing the global parameters and make sure it's still correct.
context
.
setParameter
(
"scale"
,
1.5
);
context
.
setParameter
(
"c"
,
1.0
);
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
forces
=
state
.
getForces
();
force
=
-
1.5
*
3.0
*
3
*
6.0
*
(
12
*
12
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
force
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
force
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_TOL
(
1.5
*
3.0
*
(
12
*
12
*
12
),
state
.
getPotentialEnergy
(),
TOL
);
// Try changing the per-particle parameters and make sure it's still correct.
params
[
0
]
=
1.6
;
params
[
1
]
=
2.1
;
forceField
->
setParticleParameters
(
0
,
params
);
params
[
0
]
=
1.9
;
params
[
1
]
=
2.8
;
forceField
->
setParticleParameters
(
1
,
params
);
forceField
->
updateParametersInContext
(
context
);
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
forces
=
state
.
getForces
();
force
=
-
1.5
*
1.6
*
1.9
*
3
*
5.9
*
(
11.8
*
11.8
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
force
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
force
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_TOL
(
1.5
*
1.6
*
1.9
*
(
11.8
*
11.8
*
11.8
),
state
.
getPotentialEnergy
(),
TOL
);
}
void
testManyParameters
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
forceField
=
new
CustomNonbondedForce
(
"(a1*a2+b1*b2+c1*c2+d1*d2+e1*e2)*r"
);
forceField
->
addPerParticleParameter
(
"a"
);
forceField
->
addPerParticleParameter
(
"b"
);
forceField
->
addPerParticleParameter
(
"c"
);
forceField
->
addPerParticleParameter
(
"d"
);
forceField
->
addPerParticleParameter
(
"e"
);
vector
<
double
>
params
(
5
);
params
[
0
]
=
1.0
;
params
[
1
]
=
2.0
;
params
[
2
]
=
3.0
;
params
[
3
]
=
4.0
;
params
[
4
]
=
5.0
;
forceField
->
addParticle
(
params
);
params
[
0
]
=
1.1
;
params
[
1
]
=
1.2
;
params
[
2
]
=
1.3
;
params
[
3
]
=
1.4
;
params
[
4
]
=
1.5
;
forceField
->
addParticle
(
params
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
2
,
0
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
vector
<
Vec3
>
forces
=
state
.
getForces
();
double
force
=
1
*
1.1
+
2
*
1.2
+
3
*
1.3
+
4
*
1.4
+
5
*
1.5
;
ASSERT_EQUAL_VEC
(
Vec3
(
force
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
force
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_TOL
(
2
*
force
,
state
.
getPotentialEnergy
(),
TOL
);
}
void
testExclusions
()
{
CudaPlatform
platform
;
System
system
;
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
nonbonded
=
new
CustomNonbondedForce
(
"a*r; a=a1+a2"
);
nonbonded
->
addPerParticleParameter
(
"a"
);
vector
<
double
>
params
(
1
);
vector
<
Vec3
>
positions
(
4
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
system
.
addParticle
(
1.0
);
params
[
0
]
=
i
+
1
;
nonbonded
->
addParticle
(
params
);
positions
[
i
]
=
Vec3
(
i
,
0
,
0
);
}
nonbonded
->
addExclusion
(
0
,
1
);
nonbonded
->
addExclusion
(
1
,
2
);
nonbonded
->
addExclusion
(
2
,
3
);
nonbonded
->
addExclusion
(
0
,
2
);
nonbonded
->
addExclusion
(
1
,
3
);
system
.
addForce
(
nonbonded
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
ASSERT_EQUAL_VEC
(
Vec3
(
1
+
4
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
forces
[
2
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
(
1
+
4
),
0
,
0
),
forces
[
3
],
TOL
);
ASSERT_EQUAL_TOL
((
1
+
4
)
*
3.0
,
state
.
getPotentialEnergy
(),
TOL
);
}
void
testCutoff
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
forceField
=
new
CustomNonbondedForce
(
"r"
);
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
setNonbondedMethod
(
CustomNonbondedForce
::
CutoffNonPeriodic
);
forceField
->
setCutoffDistance
(
2.5
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
3
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
0
,
2
,
0
);
positions
[
2
]
=
Vec3
(
0
,
3
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
1
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
-
1
,
0
),
forces
[
2
],
TOL
);
ASSERT_EQUAL_TOL
(
2.0
+
1.0
,
state
.
getPotentialEnergy
(),
TOL
);
}
void
testPeriodic
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
forceField
=
new
CustomNonbondedForce
(
"r"
);
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
setNonbondedMethod
(
CustomNonbondedForce
::
CutoffPeriodic
);
forceField
->
setCutoffDistance
(
2.0
);
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
4
,
0
,
0
),
Vec3
(
0
,
4
,
0
),
Vec3
(
0
,
0
,
4
));
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
3
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
0
,
2.1
,
0
);
positions
[
2
]
=
Vec3
(
0
,
3
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
-
2
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
2
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
forces
[
2
],
TOL
);
ASSERT_EQUAL_TOL
(
1.9
+
1
+
0.9
,
state
.
getPotentialEnergy
(),
TOL
);
}
void
testTabulatedFunction
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
forceField
=
new
CustomNonbondedForce
(
"fn(r)+1"
);
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
addParticle
(
vector
<
double
>
());
vector
<
double
>
table
;
for
(
int
i
=
0
;
i
<
21
;
i
++
)
table
.
push_back
(
std
::
sin
(
0.25
*
i
));
forceField
->
addFunction
(
"fn"
,
table
,
1.0
,
6.0
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
double
tol
=
0.01
;
for
(
int
i
=
1
;
i
<
30
;
i
++
)
{
double
x
=
(
7.0
/
30.0
)
*
i
;
positions
[
1
]
=
Vec3
(
x
,
0
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
double
force
=
(
x
<
1.0
||
x
>
6.0
?
0.0
:
-
std
::
cos
(
x
-
1.0
));
double
energy
=
(
x
<
1.0
||
x
>
6.0
?
0.0
:
std
::
sin
(
x
-
1.0
))
+
1.0
;
ASSERT_EQUAL_VEC
(
Vec3
(
-
force
,
0
,
0
),
forces
[
0
],
0.1
);
ASSERT_EQUAL_VEC
(
Vec3
(
force
,
0
,
0
),
forces
[
1
],
0.1
);
ASSERT_EQUAL_TOL
(
energy
,
state
.
getPotentialEnergy
(),
0.02
);
}
for
(
int
i
=
1
;
i
<
20
;
i
++
)
{
double
x
=
0.25
*
i
+
1.0
;
positions
[
1
]
=
Vec3
(
x
,
0
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Energy
);
double
energy
=
(
x
<
1.0
||
x
>
6.0
?
0.0
:
std
::
sin
(
x
-
1.0
))
+
1.0
;
ASSERT_EQUAL_TOL
(
energy
,
state
.
getPotentialEnergy
(),
1e-4
);
}
}
void
testCoulombLennardJones
()
{
const
int
numMolecules
=
300
;
const
int
numParticles
=
numMolecules
*
2
;
const
double
boxSize
=
20.0
;
CudaPlatform
platform
;
// Create two systems: one with a NonbondedForce, and one using a CustomNonbondedForce to implement the same interaction.
System
standardSystem
;
System
customSystem
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
standardSystem
.
addParticle
(
1.0
);
customSystem
.
addParticle
(
1.0
);
}
NonbondedForce
*
standardNonbonded
=
new
NonbondedForce
();
CustomNonbondedForce
*
customNonbonded
=
new
CustomNonbondedForce
(
"4*eps*((sigma/r)^12-(sigma/r)^6)+138.935456*q/r; q=q1*q2; sigma=0.5*(sigma1+sigma2); eps=sqrt(eps1*eps2)"
);
customNonbonded
->
addPerParticleParameter
(
"q"
);
customNonbonded
->
addPerParticleParameter
(
"sigma"
);
customNonbonded
->
addPerParticleParameter
(
"eps"
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
Vec3
>
velocities
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
vector
<
double
>
params
(
3
);
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
if
(
i
<
numMolecules
/
2
)
{
standardNonbonded
->
addParticle
(
1.0
,
0.2
,
0.1
);
params
[
0
]
=
1.0
;
params
[
1
]
=
0.2
;
params
[
2
]
=
0.1
;
customNonbonded
->
addParticle
(
params
);
standardNonbonded
->
addParticle
(
-
1.0
,
0.1
,
0.1
);
params
[
0
]
=
-
1.0
;
params
[
1
]
=
0.1
;
customNonbonded
->
addParticle
(
params
);
}
else
{
standardNonbonded
->
addParticle
(
1.0
,
0.2
,
0.2
);
params
[
0
]
=
1.0
;
params
[
1
]
=
0.2
;
params
[
2
]
=
0.2
;
customNonbonded
->
addParticle
(
params
);
standardNonbonded
->
addParticle
(
-
1.0
,
0.1
,
0.2
);
params
[
0
]
=
-
1.0
;
params
[
1
]
=
0.1
;
customNonbonded
->
addParticle
(
params
);
}
positions
[
2
*
i
]
=
Vec3
(
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
));
positions
[
2
*
i
+
1
]
=
Vec3
(
positions
[
2
*
i
][
0
]
+
1.0
,
positions
[
2
*
i
][
1
],
positions
[
2
*
i
][
2
]);
velocities
[
2
*
i
]
=
Vec3
(
genrand_real2
(
sfmt
),
genrand_real2
(
sfmt
),
genrand_real2
(
sfmt
));
velocities
[
2
*
i
+
1
]
=
Vec3
(
genrand_real2
(
sfmt
),
genrand_real2
(
sfmt
),
genrand_real2
(
sfmt
));
standardNonbonded
->
addException
(
2
*
i
,
2
*
i
+
1
,
0.0
,
1.0
,
0.0
);
customNonbonded
->
addExclusion
(
2
*
i
,
2
*
i
+
1
);
}
standardNonbonded
->
setNonbondedMethod
(
NonbondedForce
::
NoCutoff
);
customNonbonded
->
setNonbondedMethod
(
CustomNonbondedForce
::
NoCutoff
);
standardSystem
.
addForce
(
standardNonbonded
);
customSystem
.
addForce
(
customNonbonded
);
VerletIntegrator
integrator1
(
0.01
);
VerletIntegrator
integrator2
(
0.01
);
Context
context1
(
standardSystem
,
integrator1
,
platform
);
Context
context2
(
customSystem
,
integrator2
,
platform
);
context1
.
setPositions
(
positions
);
context2
.
setPositions
(
positions
);
context1
.
setVelocities
(
velocities
);
context2
.
setVelocities
(
velocities
);
State
state1
=
context1
.
getState
(
State
::
Forces
|
State
::
Energy
);
State
state2
=
context2
.
getState
(
State
::
Forces
|
State
::
Energy
);
ASSERT_EQUAL_TOL
(
state1
.
getPotentialEnergy
(),
state2
.
getPotentialEnergy
(),
1e-4
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
i
],
state2
.
getForces
()[
i
],
1e-4
);
}
}
void
testParallelComputation
()
{
CudaPlatform
platform
;
System
system
;
const
int
numParticles
=
200
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
CustomNonbondedForce
*
force
=
new
CustomNonbondedForce
(
"4*eps*((sigma/r)^12-(sigma/r)^6); sigma=0.5; eps=1"
);
vector
<
double
>
params
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
force
->
addParticle
(
params
);
system
.
addForce
(
force
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
vector
<
Vec3
>
positions
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
positions
[
i
]
=
Vec3
(
5
*
genrand_real2
(
sfmt
),
5
*
genrand_real2
(
sfmt
),
5
*
genrand_real2
(
sfmt
));
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
for
(
int
j
=
0
;
j
<
i
;
++
j
)
{
Vec3
delta
=
positions
[
i
]
-
positions
[
j
];
if
(
delta
.
dot
(
delta
)
<
0.1
)
force
->
addExclusion
(
i
,
j
);
}
VerletIntegrator
integrator1
(
0.01
);
Context
context1
(
system
,
integrator1
,
platform
);
context1
.
setPositions
(
positions
);
State
state1
=
context1
.
getState
(
State
::
Forces
|
State
::
Energy
);
VerletIntegrator
integrator2
(
0.01
);
string
deviceIndex
=
platform
.
getPropertyValue
(
context1
,
CudaPlatform
::
CudaDeviceIndex
());
map
<
string
,
string
>
props
;
props
[
CudaPlatform
::
CudaDeviceIndex
()]
=
deviceIndex
+
","
+
deviceIndex
;
Context
context2
(
system
,
integrator2
,
platform
,
props
);
context2
.
setPositions
(
positions
);
State
state2
=
context2
.
getState
(
State
::
Forces
|
State
::
Energy
);
ASSERT_EQUAL_TOL
(
state1
.
getPotentialEnergy
(),
state2
.
getPotentialEnergy
(),
1e-5
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
i
],
state2
.
getForces
()[
i
],
1e-5
);
}
int
main
()
{
try
{
testSimpleExpression
();
testParameters
();
testManyParameters
();
testExclusions
();
testCutoff
();
testPeriodic
();
testTabulatedFunction
();
testCoulombLennardJones
();
// testParallelComputation();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment