Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
bd22eada
Commit
bd22eada
authored
Jun 20, 2012
by
Peter Eastman
Browse files
Continuing to implement new CUDA platform: CustomNonbondedForce, CustomHbondForce, CustomIntegrator
parent
8eb6850d
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
3519 additions
and
1697 deletions
+3519
-1697
platforms/cuda2/src/CudaArray.cpp
platforms/cuda2/src/CudaArray.cpp
+1
-1
platforms/cuda2/src/CudaArray.h
platforms/cuda2/src/CudaArray.h
+2
-2
platforms/cuda2/src/CudaContext.cpp
platforms/cuda2/src/CudaContext.cpp
+4
-0
platforms/cuda2/src/CudaKernelFactory.cpp
platforms/cuda2/src/CudaKernelFactory.cpp
+6
-6
platforms/cuda2/src/CudaKernels.cpp
platforms/cuda2/src/CudaKernels.cpp
+1532
-1460
platforms/cuda2/src/CudaKernels.h
platforms/cuda2/src/CudaKernels.h
+187
-186
platforms/cuda2/src/CudaParameterSet.cpp
platforms/cuda2/src/CudaParameterSet.cpp
+56
-37
platforms/cuda2/src/CudaParameterSet.h
platforms/cuda2/src/CudaParameterSet.h
+7
-5
platforms/cuda2/src/kernels/customHbondForce.cu
platforms/cuda2/src/kernels/customHbondForce.cu
+241
-0
platforms/cuda2/src/kernels/customIntegrator.cu
platforms/cuda2/src/kernels/customIntegrator.cu
+70
-0
platforms/cuda2/src/kernels/customIntegratorGlobal.cu
platforms/cuda2/src/kernels/customIntegratorGlobal.cu
+4
-0
platforms/cuda2/src/kernels/customIntegratorPerDof.cu
platforms/cuda2/src/kernels/customIntegratorPerDof.cu
+35
-0
platforms/cuda2/src/kernels/customNonbonded.cu
platforms/cuda2/src/kernels/customNonbonded.cu
+9
-0
platforms/cuda2/tests/TestCudaCustomHbondForce.cpp
platforms/cuda2/tests/TestCudaCustomHbondForce.cpp
+249
-0
platforms/cuda2/tests/TestCudaCustomIntegrator.cpp
platforms/cuda2/tests/TestCudaCustomIntegrator.cpp
+681
-0
platforms/cuda2/tests/TestCudaCustomNonbondedForce.cpp
platforms/cuda2/tests/TestCudaCustomNonbondedForce.cpp
+435
-0
No files found.
platforms/cuda2/src/CudaArray.cpp
View file @
bd22eada
...
@@ -53,7 +53,7 @@ CudaArray::~CudaArray() {
...
@@ -53,7 +53,7 @@ CudaArray::~CudaArray() {
}
}
}
}
void
CudaArray
::
upload
(
void
*
data
,
bool
blocking
)
{
void
CudaArray
::
upload
(
const
void
*
data
,
bool
blocking
)
{
CUresult
result
;
CUresult
result
;
if
(
blocking
)
if
(
blocking
)
result
=
cuMemcpyHtoD
(
pointer
,
data
,
size
*
elementSize
);
result
=
cuMemcpyHtoD
(
pointer
,
data
,
size
*
elementSize
);
...
...
platforms/cuda2/src/CudaArray.h
View file @
bd22eada
...
@@ -94,7 +94,7 @@ public:
...
@@ -94,7 +94,7 @@ public:
* Copy the values in a vector to the device memory.
* Copy the values in a vector to the device memory.
*/
*/
template
<
class
T
>
template
<
class
T
>
void
upload
(
std
::
vector
<
T
>&
data
)
{
void
upload
(
const
std
::
vector
<
T
>&
data
)
{
if
(
sizeof
(
T
)
!=
elementSize
||
data
.
size
()
!=
size
)
if
(
sizeof
(
T
)
!=
elementSize
||
data
.
size
()
!=
size
)
throw
OpenMMException
(
"Error uploading array "
+
name
+
": The specified vector does not match the size of the array"
);
throw
OpenMMException
(
"Error uploading array "
+
name
+
": The specified vector does not match the size of the array"
);
upload
(
&
data
[
0
],
true
);
upload
(
&
data
[
0
],
true
);
...
@@ -117,7 +117,7 @@ public:
...
@@ -117,7 +117,7 @@ public:
* @param blocking if true, this call will block until the transfer is complete. If false,
* @param blocking if true, this call will block until the transfer is complete. If false,
* the source array must be in page-locked memory.
* the source array must be in page-locked memory.
*/
*/
void
upload
(
void
*
data
,
bool
blocking
=
true
);
void
upload
(
const
void
*
data
,
bool
blocking
=
true
);
/**
/**
* Copy the values in the device memory to an array.
* Copy the values in the device memory to an array.
*
*
...
...
platforms/cuda2/src/CudaContext.cpp
View file @
bd22eada
...
@@ -945,6 +945,10 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
...
@@ -945,6 +945,10 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
reorderListeners
[
i
]
->
execute
();
reorderListeners
[
i
]
->
execute
();
}
}
void
CudaContext
::
addReorderListener
(
ReorderListener
*
listener
)
{
reorderListeners
.
push_back
(
listener
);
}
struct
CudaContext
::
WorkThread
::
ThreadData
{
struct
CudaContext
::
WorkThread
::
ThreadData
{
ThreadData
(
std
::
queue
<
CudaContext
::
WorkTask
*>&
tasks
,
bool
&
waiting
,
bool
&
finished
,
ThreadData
(
std
::
queue
<
CudaContext
::
WorkTask
*>&
tasks
,
bool
&
waiting
,
bool
&
finished
,
pthread_mutex_t
&
queueLock
,
pthread_cond_t
&
waitForTaskCondition
,
pthread_cond_t
&
queueEmptyCondition
)
:
pthread_mutex_t
&
queueLock
,
pthread_cond_t
&
waitForTaskCondition
,
pthread_cond_t
&
queueEmptyCondition
)
:
...
...
platforms/cuda2/src/CudaKernelFactory.cpp
View file @
bd22eada
...
@@ -94,16 +94,16 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
...
@@ -94,16 +94,16 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return
new
CudaCalcCustomTorsionForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
CudaCalcCustomTorsionForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcNonbondedForceKernel
::
Name
())
if
(
name
==
CalcNonbondedForceKernel
::
Name
())
return
new
CudaCalcNonbondedForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
CudaCalcNonbondedForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
//
if (name == CalcCustomNonbondedForceKernel::Name())
if
(
name
==
CalcCustomNonbondedForceKernel
::
Name
())
//
return new CudaCalcCustomNonbondedForceKernel(name, platform, cu, context.getSystem());
return
new
CudaCalcCustomNonbondedForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
// if (name == CalcGBSAOBCForceKernel::Name())
// if (name == CalcGBSAOBCForceKernel::Name())
// return new CudaCalcGBSAOBCForceKernel(name, platform, cu);
// return new CudaCalcGBSAOBCForceKernel(name, platform, cu);
// if (name == CalcCustomGBForceKernel::Name())
// if (name == CalcCustomGBForceKernel::Name())
// return new CudaCalcCustomGBForceKernel(name, platform, cu, context.getSystem());
// return new CudaCalcCustomGBForceKernel(name, platform, cu, context.getSystem());
if
(
name
==
CalcCustomExternalForceKernel
::
Name
())
if
(
name
==
CalcCustomExternalForceKernel
::
Name
())
return
new
CudaCalcCustomExternalForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
CudaCalcCustomExternalForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
//
if (name == CalcCustomHbondForceKernel::Name())
if
(
name
==
CalcCustomHbondForceKernel
::
Name
())
//
return new CudaCalcCustomHbondForceKernel(name, platform, cu, context.getSystem());
return
new
CudaCalcCustomHbondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcCustomCompoundBondForceKernel
::
Name
())
if
(
name
==
CalcCustomCompoundBondForceKernel
::
Name
())
return
new
CudaCalcCustomCompoundBondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
CudaCalcCustomCompoundBondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
IntegrateVerletStepKernel
::
Name
())
if
(
name
==
IntegrateVerletStepKernel
::
Name
())
...
@@ -116,8 +116,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
...
@@ -116,8 +116,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return
new
CudaIntegrateVariableVerletStepKernel
(
name
,
platform
,
cu
);
return
new
CudaIntegrateVariableVerletStepKernel
(
name
,
platform
,
cu
);
if
(
name
==
IntegrateVariableLangevinStepKernel
::
Name
())
if
(
name
==
IntegrateVariableLangevinStepKernel
::
Name
())
return
new
CudaIntegrateVariableLangevinStepKernel
(
name
,
platform
,
cu
);
return
new
CudaIntegrateVariableLangevinStepKernel
(
name
,
platform
,
cu
);
//
if (name == IntegrateCustomStepKernel::Name())
if
(
name
==
IntegrateCustomStepKernel
::
Name
())
//
return new CudaIntegrateCustomStepKernel(name, platform, cu);
return
new
CudaIntegrateCustomStepKernel
(
name
,
platform
,
cu
);
if
(
name
==
ApplyAndersenThermostatKernel
::
Name
())
if
(
name
==
ApplyAndersenThermostatKernel
::
Name
())
return
new
CudaApplyAndersenThermostatKernel
(
name
,
platform
,
cu
);
return
new
CudaApplyAndersenThermostatKernel
(
name
,
platform
,
cu
);
if
(
name
==
ApplyMonteCarloBarostatKernel
::
Name
())
if
(
name
==
ApplyMonteCarloBarostatKernel
::
Name
())
...
...
platforms/cuda2/src/CudaKernels.cpp
View file @
bd22eada
...
@@ -1669,277 +1669,277 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
...
@@ -1669,277 +1669,277 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
cu
.
invalidateMolecules
();
cu
.
invalidateMolecules
();
}
}
//class CudaCustomNonbondedForceInfo : public CudaForceInfo {
class
CudaCustomNonbondedForceInfo
:
public
CudaForceInfo
{
public:
CudaCustomNonbondedForceInfo
(
const
CustomNonbondedForce
&
force
)
:
force
(
force
)
{
}
bool
areParticlesIdentical
(
int
particle1
,
int
particle2
)
{
vector
<
double
>
params1
;
vector
<
double
>
params2
;
force
.
getParticleParameters
(
particle1
,
params1
);
force
.
getParticleParameters
(
particle2
,
params2
);
for
(
int
i
=
0
;
i
<
(
int
)
params1
.
size
();
i
++
)
if
(
params1
[
i
]
!=
params2
[
i
])
return
false
;
return
true
;
}
int
getNumParticleGroups
()
{
return
force
.
getNumExclusions
();
}
void
getParticlesInGroup
(
int
index
,
vector
<
int
>&
particles
)
{
int
particle1
,
particle2
;
force
.
getExclusionParticles
(
index
,
particle1
,
particle2
);
particles
.
resize
(
2
);
particles
[
0
]
=
particle1
;
particles
[
1
]
=
particle2
;
}
bool
areGroupsIdentical
(
int
group1
,
int
group2
)
{
return
true
;
}
private:
const
CustomNonbondedForce
&
force
;
};
CudaCalcCustomNonbondedForceKernel
::~
CudaCalcCustomNonbondedForceKernel
()
{
cu
.
setAsCurrent
();
if
(
params
!=
NULL
)
delete
params
;
if
(
globals
!=
NULL
)
delete
globals
;
if
(
tabulatedFunctionParams
!=
NULL
)
delete
tabulatedFunctionParams
;
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
delete
tabulatedFunctions
[
i
];
}
void
CudaCalcCustomNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
CustomNonbondedForce
&
force
)
{
cu
.
setAsCurrent
();
int
forceIndex
;
for
(
forceIndex
=
0
;
forceIndex
<
system
.
getNumForces
()
&&
&
system
.
getForce
(
forceIndex
)
!=
&
force
;
++
forceIndex
)
;
string
prefix
=
"custom"
+
cu
.
intToString
(
forceIndex
)
+
"_"
;
// Record parameters and exclusions.
int
numParticles
=
force
.
getNumParticles
();
params
=
new
CudaParameterSet
(
cu
,
force
.
getNumPerParticleParameters
(),
numParticles
,
"customNonbondedParameters"
);
if
(
force
.
getNumGlobalParameters
()
>
0
)
globals
=
CudaArray
::
create
<
float
>
(
cu
,
force
.
getNumGlobalParameters
(),
"customNonbondedGlobals"
);
vector
<
vector
<
float
>
>
paramVector
(
numParticles
);
vector
<
vector
<
int
>
>
exclusionList
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
vector
<
double
>
parameters
;
force
.
getParticleParameters
(
i
,
parameters
);
paramVector
[
i
].
resize
(
parameters
.
size
());
for
(
int
j
=
0
;
j
<
(
int
)
parameters
.
size
();
j
++
)
paramVector
[
i
][
j
]
=
(
float
)
parameters
[
j
];
exclusionList
[
i
].
push_back
(
i
);
}
for
(
int
i
=
0
;
i
<
force
.
getNumExclusions
();
i
++
)
{
int
particle1
,
particle2
;
force
.
getExclusionParticles
(
i
,
particle1
,
particle2
);
exclusionList
[
particle1
].
push_back
(
particle2
);
exclusionList
[
particle2
].
push_back
(
particle1
);
}
params
->
setParameterValues
(
paramVector
);
// Record the tabulated functions.
CudaExpressionUtilities
::
FunctionPlaceholder
fp
;
map
<
string
,
Lepton
::
CustomFunction
*>
functions
;
vector
<
pair
<
string
,
string
>
>
functionDefinitions
;
vector
<
float4
>
tabulatedFunctionParamsVec
(
force
.
getNumFunctions
());
for
(
int
i
=
0
;
i
<
force
.
getNumFunctions
();
i
++
)
{
string
name
;
vector
<
double
>
values
;
double
min
,
max
;
force
.
getFunctionParameters
(
i
,
name
,
values
,
min
,
max
);
string
arrayName
=
prefix
+
"table"
+
cu
.
intToString
(
i
);
functionDefinitions
.
push_back
(
make_pair
(
name
,
arrayName
));
functions
[
name
]
=
&
fp
;
tabulatedFunctionParamsVec
[
i
]
=
make_float4
((
float
)
min
,
(
float
)
max
,
(
float
)
((
values
.
size
()
-
1
)
/
(
max
-
min
)),
(
float
)
values
.
size
()
-
2
);
vector
<
float4
>
f
=
cu
.
getExpressionUtilities
().
computeFunctionCoefficients
(
values
,
min
,
max
);
tabulatedFunctions
.
push_back
(
CudaArray
::
create
<
float4
>
(
cu
,
values
.
size
()
-
1
,
"TabulatedFunction"
));
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
upload
(
f
);
cu
.
getNonbondedUtilities
().
addArgument
(
CudaNonbondedUtilities
::
ParameterInfo
(
arrayName
,
"float"
,
4
,
sizeof
(
float4
),
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
getDevicePointer
()));
}
if
(
force
.
getNumFunctions
()
>
0
)
{
tabulatedFunctionParams
=
CudaArray
::
create
<
float4
>
(
cu
,
tabulatedFunctionParamsVec
.
size
(),
"tabulatedFunctionParameters"
);
tabulatedFunctionParams
->
upload
(
tabulatedFunctionParamsVec
);
cu
.
getNonbondedUtilities
().
addArgument
(
CudaNonbondedUtilities
::
ParameterInfo
(
prefix
+
"functionParams"
,
"float"
,
4
,
sizeof
(
float4
),
tabulatedFunctionParams
->
getDevicePointer
()));
}
// Record information for the expressions.
globalParamNames
.
resize
(
force
.
getNumGlobalParameters
());
globalParamValues
.
resize
(
force
.
getNumGlobalParameters
());
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
globalParamNames
[
i
]
=
force
.
getGlobalParameterName
(
i
);
globalParamValues
[
i
]
=
(
float
)
force
.
getGlobalParameterDefaultValue
(
i
);
}
if
(
globals
!=
NULL
)
globals
->
upload
(
globalParamValues
);
bool
useCutoff
=
(
force
.
getNonbondedMethod
()
!=
CustomNonbondedForce
::
NoCutoff
);
bool
usePeriodic
=
(
force
.
getNonbondedMethod
()
!=
CustomNonbondedForce
::
NoCutoff
&&
force
.
getNonbondedMethod
()
!=
CustomNonbondedForce
::
CutoffNonPeriodic
);
Lepton
::
ParsedExpression
energyExpression
=
Lepton
::
Parser
::
parse
(
force
.
getEnergyFunction
(),
functions
).
optimize
();
Lepton
::
ParsedExpression
forceExpression
=
energyExpression
.
differentiate
(
"r"
).
optimize
();
map
<
string
,
Lepton
::
ParsedExpression
>
forceExpressions
;
forceExpressions
[
"tempEnergy += "
]
=
energyExpression
;
forceExpressions
[
"tempForce -= "
]
=
forceExpression
;
// Create the kernels.
vector
<
pair
<
ExpressionTreeNode
,
string
>
>
variables
;
ExpressionTreeNode
rnode
(
new
Operation
::
Variable
(
"r"
));
variables
.
push_back
(
make_pair
(
rnode
,
"r"
));
variables
.
push_back
(
make_pair
(
ExpressionTreeNode
(
new
Operation
::
Square
(),
rnode
),
"r2"
));
variables
.
push_back
(
make_pair
(
ExpressionTreeNode
(
new
Operation
::
Reciprocal
(),
rnode
),
"invR"
));
for
(
int
i
=
0
;
i
<
force
.
getNumPerParticleParameters
();
i
++
)
{
const
string
&
name
=
force
.
getPerParticleParameterName
(
i
);
variables
.
push_back
(
makeVariable
(
name
+
"1"
,
prefix
+
"params"
+
params
->
getParameterSuffix
(
i
,
"1"
)));
variables
.
push_back
(
makeVariable
(
name
+
"2"
,
prefix
+
"params"
+
params
->
getParameterSuffix
(
i
,
"2"
)));
}
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
string
value
=
"globals["
+
cu
.
intToString
(
i
)
+
"]"
;
variables
.
push_back
(
makeVariable
(
name
,
prefix
+
value
));
}
stringstream
compute
;
compute
<<
cu
.
getExpressionUtilities
().
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
prefix
+
"temp"
,
prefix
+
"functionParams"
);
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
string
source
=
cu
.
replaceStrings
(
CudaKernelSources
::
customNonbonded
,
replacements
);
cu
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
,
force
.
getForceGroup
());
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
cu
.
getNonbondedUtilities
().
addParameter
(
CudaNonbondedUtilities
::
ParameterInfo
(
prefix
+
"params"
+
cu
.
intToString
(
i
+
1
),
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
}
if
(
globals
!=
NULL
)
{
globals
->
upload
(
globalParamValues
);
cu
.
getNonbondedUtilities
().
addArgument
(
CudaNonbondedUtilities
::
ParameterInfo
(
prefix
+
"globals"
,
"float"
,
1
,
sizeof
(
float
),
globals
->
getDevicePointer
()));
}
cu
.
addForce
(
new
CudaCustomNonbondedForceInfo
(
force
));
}
double
CudaCalcCustomNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
if
(
globals
!=
NULL
)
{
bool
changed
=
false
;
for
(
int
i
=
0
;
i
<
(
int
)
globalParamNames
.
size
();
i
++
)
{
float
value
=
(
float
)
context
.
getParameter
(
globalParamNames
[
i
]);
if
(
value
!=
globalParamValues
[
i
])
changed
=
true
;
globalParamValues
[
i
]
=
value
;
}
if
(
changed
)
globals
->
upload
(
globalParamValues
);
}
return
0.0
;
}
void
CudaCalcCustomNonbondedForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
)
{
cu
.
setAsCurrent
();
int
numParticles
=
force
.
getNumParticles
();
if
(
numParticles
!=
cu
.
getNumAtoms
())
throw
OpenMMException
(
"updateParametersInContext: The number of particles has changed"
);
// Record the per-particle parameters.
vector
<
vector
<
float
>
>
paramVector
(
numParticles
);
vector
<
double
>
parameters
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
force
.
getParticleParameters
(
i
,
parameters
);
paramVector
[
i
].
resize
(
parameters
.
size
());
for
(
int
j
=
0
;
j
<
(
int
)
parameters
.
size
();
j
++
)
paramVector
[
i
][
j
]
=
(
float
)
parameters
[
j
];
}
params
->
setParameterValues
(
paramVector
);
// Mark that the current reordering may be invalid.
cu
.
invalidateMolecules
();
}
//class CudaGBSAOBCForceInfo : public CudaForceInfo {
//public:
//public:
// Cuda
CustomNonbonded
ForceInfo(int requiredBuffers, const
CustomNonbonded
Force& force) : CudaForceInfo(requiredBuffers), force(force) {
// Cuda
GBSAOBC
ForceInfo(int requiredBuffers, const
GBSAOBC
Force& force) : CudaForceInfo(requiredBuffers), force(force) {
// }
// }
// bool areParticlesIdentical(int particle1, int particle2) {
// bool areParticlesIdentical(int particle1, int particle2) {
// vector<double> params1;
// double charge1, charge2, radius1, radius2, scale1, scale2;
// vector<double> params2;
// force.getParticleParameters(particle1, charge1, radius1, scale1);
// force.getParticleParameters(particle1, params1);
// force.getParticleParameters(particle2, charge2, radius2, scale2);
// force.getParticleParameters(particle2, params2);
// return (charge1 == charge2 && radius1 == radius2 && scale1 == scale2);
// for (int i = 0; i < (int) params1.size(); i++)
// if (params1[i] != params2[i])
// return false;
// return true;
// }
// int getNumParticleGroups() {
// return force.getNumExclusions();
// }
// void getParticlesInGroup(int index, vector<int>& particles) {
// int particle1, particle2;
// force.getExclusionParticles(index, particle1, particle2);
// particles.resize(2);
// particles[0] = particle1;
// particles[1] = particle2;
// }
// bool areGroupsIdentical(int group1, int group2) {
// return true;
// }
// }
//private:
//private:
// const
CustomNonbonded
Force& force;
// const
GBSAOBC
Force& force;
//};
//};
//
//
//CudaCalc
CustomNonbonded
ForceKernel::~CudaCalc
CustomNonbonded
ForceKernel() {
//CudaCalc
GBSAOBC
ForceKernel::~CudaCalc
GBSAOBC
ForceKernel() {
// cu.setAsCurrent();
// cu.setAsCurrent();
// if (params != NULL)
// if (params != NULL)
// delete params;
// delete params;
// if (globals != NULL)
// if (bornSum != NULL)
// delete globals;
// delete bornSum;
// if (tabulatedFunctionParams != NULL)
// if (longBornSum != NULL)
// delete tabulatedFunctionParams;
// delete longBornSum;
// for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
// if (bornRadii != NULL)
// delete tabulatedFunctions[i];
// delete bornRadii;
// if (bornForce != NULL)
// delete bornForce;
// if (longBornForce != NULL)
// delete longBornForce;
// if (obcChain != NULL)
// delete obcChain;
//}
//}
//
//
//void CudaCalc
CustomNonbonded
ForceKernel::initialize(const System& system, const
CustomNonbonded
Force& force) {
//void CudaCalc
GBSAOBC
ForceKernel::initialize(const System& system, const
GBSAOBC
Force& force) {
// cu.setAsCurrent();
// cu.setAsCurrent();
// int forceIndex;
// if (cu.getPlatformData().contexts.size() > 1)
// for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
// throw OpenMMException("GBSAOBCForce does not support using multiple CUDA devices");
// ;
// CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// string prefix = "custom"+cu.intToString(forceIndex)+"_";
// params = new CudaArray<mm_float2>(cu, cu.getPaddedNumAtoms(), "gbsaObcParams");
//
// bornRadii = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms(), "bornRadii");
// // Record parameters and exclusions.
// obcChain = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms(), "obcChain");
//
// if (cu.getSupports64BitGlobalAtomics()) {
// longBornSum = new CudaArray<cl_long>(cu, cu.getPaddedNumAtoms(), "longBornSum");
// longBornForce = new CudaArray<cl_long>(cu, cu.getPaddedNumAtoms(), "longBornForce");
// bornForce = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms(), "bornForce");
// cu.addAutoclearBuffer(longBornSum->getDevicePointer(), 2*longBornSum->getSize());
// cu.addAutoclearBuffer(longBornForce->getDevicePointer(), 2*longBornForce->getSize());
// }
// else {
// bornSum = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornSum");
// bornForce = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornForce");
// cu.addAutoclearBuffer(bornSum->getDevicePointer(), bornSum->getSize());
// cu.addAutoclearBuffer(bornForce->getDevicePointer(), bornForce->getSize());
// }
// CudaArray<mm_float4>& posq = cu.getPosq();
// int numParticles = force.getNumParticles();
// int numParticles = force.getNumParticles();
// params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
// vector<mm_float2> paramsVector(numParticles);
// if (force.getNumGlobalParameters() > 0)
// const double dielectricOffset = 0.009;
// globals = new CudaArray<cl_float>(cu, force.getNumGlobalParameters(), "customNonbondedGlobals", false, CL_MEM_READ_ONLY);
// vector<vector<cl_float> > paramVector(numParticles);
// vector<vector<int> > exclusionList(numParticles);
// for (int i = 0; i < numParticles; i++) {
// for (int i = 0; i < numParticles; i++) {
// vector<double> parameters;
// double charge, radius, scalingFactor;
// force.getParticleParameters(i, parameters);
// force.getParticleParameters(i, charge, radius, scalingFactor);
// paramVector[i].resize(parameters.size());
// radius -= dielectricOffset;
// for (int j = 0; j < (int) parameters.size(); j++)
// paramsVector[i] = mm_float2((float) radius, (float) (scalingFactor*radius));
// paramVector[i][j] = (cl_float) parameters[j];
// posq[i].w = (float) charge;
// exclusionList[i].push_back(i);
// }
// for (int i = 0; i < force.getNumExclusions(); i++) {
// int particle1, particle2;
// force.getExclusionParticles(i, particle1, particle2);
// exclusionList[particle1].push_back(particle2);
// exclusionList[particle2].push_back(particle1);
// }
// }
// params->setParameterValues(paramVector);
// posq.upload();
// params->upload(paramsVector);
// prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
// bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
// bool usePeriodic = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff && force.getNonbondedMethod() != GBSAOBCForce::CutoffNonPeriodic);
// string source = CudaKernelSources::gbsaObc2;
// nb.addInteraction(useCutoff, usePeriodic, false, force.getCutoffDistance(), vector<vector<int> >(), source, force.getForceGroup());
// nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(cl_float2), params->getDevicePointer()));;
// nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "float", 1, sizeof(cl_float), bornForce->getDevicePointer()));;
// cu.addForce(new CudaGBSAOBCForceInfo(nb.getNumForceBuffers(), force));
//}
//
//
// // Record the tabulated functions.
//double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
//
// CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// CudaExpressionUtilities::FunctionPlaceholder fp;
// bool deviceIsCpu = (cu.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
// map<string, Lepton::CustomFunction*> functions;
// if (!hasCreatedKernels) {
// vector<pair<string, string> > functionDefinitions;
// // These Kernels cannot be created in initialize(), because the CudaNonbondedUtilities has not been initialized yet then.
// vector<mm_float4> tabulatedFunctionParamsVec(force.getNumFunctions());
// for (int i = 0; i < force.getNumFunctions(); i++) {
// string name;
// vector<double> values;
// double min, max;
// force.getFunctionParameters(i, name, values, min, max);
// string arrayName = prefix+"table"+cu.intToString(i);
// functionDefinitions.push_back(make_pair(name, arrayName));
// functions[name] = &fp;
// tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
// vector<mm_float4> f = cu.getExpressionUtilities().computeFunctionCoefficients(values, min, max);
// tabulatedFunctions.push_back(new CudaArray<mm_float4>(cu, values.size()-1, "TabulatedFunction"));
// tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
// cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", 4, sizeof(cl_float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDevicePointer()));
// }
// if (force.getNumFunctions() > 0) {
// tabulatedFunctionParams = new CudaArray<mm_float4>(cu, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY);
// tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
// cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"functionParams", "float", 4, sizeof(cl_float4), tabulatedFunctionParams->getDevicePointer()));
// }
//
// // Record information for the expressions.
//
// globalParamNames.resize(force.getNumGlobalParameters());
// globalParamValues.resize(force.getNumGlobalParameters());
// for (int i = 0; i < force.getNumGlobalParameters(); i++) {
// globalParamNames[i] = force.getGlobalParameterName(i);
// globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
// }
// if (globals != NULL)
// globals->upload(globalParamValues);
// bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff);
// bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic);
// Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
// Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r").optimize();
// map<string, Lepton::ParsedExpression> forceExpressions;
// forceExpressions["tempEnergy += "] = energyExpression;
// forceExpressions["tempForce -= "] = forceExpression;
//
// // Create the kernels.
//
// vector<pair<ExpressionTreeNode, string> > variables;
// ExpressionTreeNode rnode(new Operation::Variable("r"));
// variables.push_back(make_pair(rnode, "r"));
// variables.push_back(make_pair(ExpressionTreeNode(new Operation::Square(), rnode), "r2"));
// variables.push_back(make_pair(ExpressionTreeNode(new Operation::Reciprocal(), rnode), "invR"));
// for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
// const string& name = force.getPerParticleParameterName(i);
// variables.push_back(makeVariable(name+"1", prefix+"params"+params->getParameterSuffix(i, "1")));
// variables.push_back(makeVariable(name+"2", prefix+"params"+params->getParameterSuffix(i, "2")));
// }
// for (int i = 0; i < force.getNumGlobalParameters(); i++) {
// const string& name = force.getGlobalParameterName(i);
// string value = "globals["+cu.intToString(i)+"]";
// variables.push_back(makeVariable(name, prefix+value));
// }
// stringstream compute;
// compute << cu.getExpressionUtilities().createExpressions(forceExpressions, variables, functionDefinitions, prefix+"temp", prefix+"functionParams");
// map<string, string> replacements;
// replacements["COMPUTE_FORCE"] = compute.str();
// string source = cu.replaceStrings(CudaKernelSources::customNonbonded, replacements);
// cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
// for (int i = 0; i < (int) params->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
// cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"params"+cu.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
// }
// if (globals != NULL) {
// globals->upload(globalParamValues);
// cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals->getDevicePointer()));
// }
// cu.addForce(new CudaCustomNonbondedForceInfo(cu.getNonbondedUtilities().getNumForceBuffers(), force));
//}
//
//double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
// if (globals != NULL) {
// bool changed = false;
// for (int i = 0; i < (int) globalParamNames.size(); i++) {
// cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
// if (value != globalParamValues[i])
// changed = true;
// globalParamValues[i] = value;
// }
// if (changed)
// globals->upload(globalParamValues);
// }
// return 0.0;
//}
//
//void CudaCalcCustomNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force) {
// cu.setAsCurrent();
// int numParticles = force.getNumParticles();
// if (numParticles != cu.getNumAtoms())
// throw OpenMMException("updateParametersInContext: The number of particles has changed");
//
// // Record the per-particle parameters.
//
// vector<vector<cl_float> > paramVector(numParticles);
// vector<double> parameters;
// for (int i = 0; i < numParticles; i++) {
// force.getParticleParameters(i, parameters);
// paramVector[i].resize(parameters.size());
// for (int j = 0; j < (int) parameters.size(); j++)
// paramVector[i][j] = (cl_float) parameters[j];
// }
// params->setParameterValues(paramVector);
//
// // Mark that the current reordering may be invalid.
//
// cu.invalidateMolecules();
//}
//
//class CudaGBSAOBCForceInfo : public CudaForceInfo {
//public:
// CudaGBSAOBCForceInfo(int requiredBuffers, const GBSAOBCForce& force) : CudaForceInfo(requiredBuffers), force(force) {
// }
// bool areParticlesIdentical(int particle1, int particle2) {
// double charge1, charge2, radius1, radius2, scale1, scale2;
// force.getParticleParameters(particle1, charge1, radius1, scale1);
// force.getParticleParameters(particle2, charge2, radius2, scale2);
// return (charge1 == charge2 && radius1 == radius2 && scale1 == scale2);
// }
//private:
// const GBSAOBCForce& force;
//};
//
//CudaCalcGBSAOBCForceKernel::~CudaCalcGBSAOBCForceKernel() {
// cu.setAsCurrent();
// if (params != NULL)
// delete params;
// if (bornSum != NULL)
// delete bornSum;
// if (longBornSum != NULL)
// delete longBornSum;
// if (bornRadii != NULL)
// delete bornRadii;
// if (bornForce != NULL)
// delete bornForce;
// if (longBornForce != NULL)
// delete longBornForce;
// if (obcChain != NULL)
// delete obcChain;
//}
//
//void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
// cu.setAsCurrent();
// if (cu.getPlatformData().contexts.size() > 1)
// throw OpenMMException("GBSAOBCForce does not support using multiple CUDA devices");
// CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// params = new CudaArray<mm_float2>(cu, cu.getPaddedNumAtoms(), "gbsaObcParams");
// bornRadii = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms(), "bornRadii");
// obcChain = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms(), "obcChain");
// if (cu.getSupports64BitGlobalAtomics()) {
// longBornSum = new CudaArray<cl_long>(cu, cu.getPaddedNumAtoms(), "longBornSum");
// longBornForce = new CudaArray<cl_long>(cu, cu.getPaddedNumAtoms(), "longBornForce");
// bornForce = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms(), "bornForce");
// cu.addAutoclearBuffer(longBornSum->getDevicePointer(), 2*longBornSum->getSize());
// cu.addAutoclearBuffer(longBornForce->getDevicePointer(), 2*longBornForce->getSize());
// }
// else {
// bornSum = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornSum");
// bornForce = new CudaArray<cl_float>(cu, cu.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornForce");
// cu.addAutoclearBuffer(bornSum->getDevicePointer(), bornSum->getSize());
// cu.addAutoclearBuffer(bornForce->getDevicePointer(), bornForce->getSize());
// }
// CudaArray<mm_float4>& posq = cu.getPosq();
// int numParticles = force.getNumParticles();
// vector<mm_float2> paramsVector(numParticles);
// const double dielectricOffset = 0.009;
// for (int i = 0; i < numParticles; i++) {
// double charge, radius, scalingFactor;
// force.getParticleParameters(i, charge, radius, scalingFactor);
// radius -= dielectricOffset;
// paramsVector[i] = mm_float2((float) radius, (float) (scalingFactor*radius));
// posq[i].w = (float) charge;
// }
// posq.upload();
// params->upload(paramsVector);
// prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
// bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
// bool usePeriodic = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff && force.getNonbondedMethod() != GBSAOBCForce::CutoffNonPeriodic);
// string source = CudaKernelSources::gbsaObc2;
// nb.addInteraction(useCutoff, usePeriodic, false, force.getCutoffDistance(), vector<vector<int> >(), source, force.getForceGroup());
// nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(cl_float2), params->getDevicePointer()));;
// nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "float", 1, sizeof(cl_float), bornForce->getDevicePointer()));;
// cu.addForce(new CudaGBSAOBCForceInfo(nb.getNumForceBuffers(), force));
//}
//
//double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
// CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// bool deviceIsCpu = (cu.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
// if (!hasCreatedKernels) {
// // These Kernels cannot be created in initialize(), because the CudaNonbondedUtilities has not been initialized yet then.
//
//
// hasCreatedKernels = true;
// hasCreatedKernels = true;
// maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : 0);
// maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : 0);
...
@@ -3172,557 +3172,524 @@ void CudaCalcCustomExternalForceKernel::copyParametersToContext(ContextImpl& con
...
@@ -3172,557 +3172,524 @@ void CudaCalcCustomExternalForceKernel::copyParametersToContext(ContextImpl& con
cu
.
invalidateMolecules
();
cu
.
invalidateMolecules
();
}
}
//class CudaCustomHbondForceInfo : public CudaForceInfo {
class
CudaCustomHbondForceInfo
:
public
CudaForceInfo
{
//public:
public:
// CudaCustomHbondForceInfo(int requiredBuffers, const CustomHbondForce& force) : CudaForceInfo(requiredBuffers), force(force) {
CudaCustomHbondForceInfo
(
const
CustomHbondForce
&
force
)
:
force
(
force
)
{
// }
}
// bool areParticlesIdentical(int particle1, int particle2) {
bool
areParticlesIdentical
(
int
particle1
,
int
particle2
)
{
// return true;
return
true
;
// }
}
// int getNumParticleGroups() {
int
getNumParticleGroups
()
{
// return force.getNumDonors()+force.getNumAcceptors()+force.getNumExclusions();
return
force
.
getNumDonors
()
+
force
.
getNumAcceptors
()
+
force
.
getNumExclusions
();
// }
}
// void getParticlesInGroup(int index, vector<int>& particles) {
void
getParticlesInGroup
(
int
index
,
vector
<
int
>&
particles
)
{
// int p1, p2, p3;
int
p1
,
p2
,
p3
;
// vector<double> parameters;
vector
<
double
>
parameters
;
// if (index < force.getNumDonors()) {
if
(
index
<
force
.
getNumDonors
())
{
// force.getDonorParameters(index, p1, p2, p3, parameters);
force
.
getDonorParameters
(
index
,
p1
,
p2
,
p3
,
parameters
);
// particles.clear();
particles
.
clear
();
// particles.push_back(p1);
particles
.
push_back
(
p1
);
// if (p2 > -1)
if
(
p2
>
-
1
)
// particles.push_back(p2);
particles
.
push_back
(
p2
);
// if (p3 > -1)
if
(
p3
>
-
1
)
// particles.push_back(p3);
particles
.
push_back
(
p3
);
// return;
return
;
// }
}
// index -= force.getNumDonors();
index
-=
force
.
getNumDonors
();
// if (index < force.getNumAcceptors()) {
if
(
index
<
force
.
getNumAcceptors
())
{
// force.getAcceptorParameters(index, p1, p2, p3, parameters);
force
.
getAcceptorParameters
(
index
,
p1
,
p2
,
p3
,
parameters
);
// particles.clear();
particles
.
clear
();
// particles.push_back(p1);
particles
.
push_back
(
p1
);
// if (p2 > -1)
if
(
p2
>
-
1
)
// particles.push_back(p2);
particles
.
push_back
(
p2
);
// if (p3 > -1)
if
(
p3
>
-
1
)
// particles.push_back(p3);
particles
.
push_back
(
p3
);
// return;
return
;
// }
}
// index -= force.getNumAcceptors();
index
-=
force
.
getNumAcceptors
();
// int donor, acceptor;
int
donor
,
acceptor
;
// force.getExclusionParticles(index, donor, acceptor);
force
.
getExclusionParticles
(
index
,
donor
,
acceptor
);
// particles.clear();
particles
.
clear
();
// force.getDonorParameters(donor, p1, p2, p3, parameters);
force
.
getDonorParameters
(
donor
,
p1
,
p2
,
p3
,
parameters
);
// particles.push_back(p1);
particles
.
push_back
(
p1
);
// if (p2 > -1)
if
(
p2
>
-
1
)
// particles.push_back(p2);
particles
.
push_back
(
p2
);
// if (p3 > -1)
if
(
p3
>
-
1
)
// particles.push_back(p3);
particles
.
push_back
(
p3
);
// force.getAcceptorParameters(acceptor, p1, p2, p3, parameters);
force
.
getAcceptorParameters
(
acceptor
,
p1
,
p2
,
p3
,
parameters
);
// particles.push_back(p1);
particles
.
push_back
(
p1
);
// if (p2 > -1)
if
(
p2
>
-
1
)
// particles.push_back(p2);
particles
.
push_back
(
p2
);
// if (p3 > -1)
if
(
p3
>
-
1
)
// particles.push_back(p3);
particles
.
push_back
(
p3
);
// }
}
// bool areGroupsIdentical(int group1, int group2) {
bool
areGroupsIdentical
(
int
group1
,
int
group2
)
{
// int p1, p2, p3;
int
p1
,
p2
,
p3
;
// vector<double> params1, params2;
vector
<
double
>
params1
,
params2
;
// if (group1 < force.getNumDonors() && group2 < force.getNumDonors()) {
if
(
group1
<
force
.
getNumDonors
()
&&
group2
<
force
.
getNumDonors
())
{
// force.getDonorParameters(group1, p1, p2, p3, params1);
force
.
getDonorParameters
(
group1
,
p1
,
p2
,
p3
,
params1
);
// force.getDonorParameters(group2, p1, p2, p3, params2);
force
.
getDonorParameters
(
group2
,
p1
,
p2
,
p3
,
params2
);
// return (params1 == params2 && params1 == params2);
return
(
params1
==
params2
&&
params1
==
params2
);
// }
}
// if (group1 < force.getNumDonors() || group2 < force.getNumDonors())
if
(
group1
<
force
.
getNumDonors
()
||
group2
<
force
.
getNumDonors
())
// return false;
return
false
;
// group1 -= force.getNumDonors();
group1
-=
force
.
getNumDonors
();
// group2 -= force.getNumDonors();
group2
-=
force
.
getNumDonors
();
// if (group1 < force.getNumAcceptors() && group2 < force.getNumAcceptors()) {
if
(
group1
<
force
.
getNumAcceptors
()
&&
group2
<
force
.
getNumAcceptors
())
{
// force.getAcceptorParameters(group1, p1, p2, p3, params1);
force
.
getAcceptorParameters
(
group1
,
p1
,
p2
,
p3
,
params1
);
// force.getAcceptorParameters(group2, p1, p2, p3, params2);
force
.
getAcceptorParameters
(
group2
,
p1
,
p2
,
p3
,
params2
);
// return (params1 == params2 && params1 == params2);
return
(
params1
==
params2
&&
params1
==
params2
);
// }
}
// if (group1 < force.getNumAcceptors() || group2 < force.getNumAcceptors())
if
(
group1
<
force
.
getNumAcceptors
()
||
group2
<
force
.
getNumAcceptors
())
// return false;
return
false
;
// return true;
return
true
;
// }
}
//private:
private:
// const CustomHbondForce& force;
const
CustomHbondForce
&
force
;
//};
};
//
//CudaCalcCustomHbondForceKernel::~CudaCalcCustomHbondForceKernel() {
CudaCalcCustomHbondForceKernel
::~
CudaCalcCustomHbondForceKernel
()
{
// cu.setAsCurrent();
cu
.
setAsCurrent
();
// if (donorParams != NULL)
if
(
donorParams
!=
NULL
)
// delete donorParams;
delete
donorParams
;
// if (acceptorParams != NULL)
if
(
acceptorParams
!=
NULL
)
// delete acceptorParams;
delete
acceptorParams
;
// if (donors != NULL)
if
(
donors
!=
NULL
)
// delete donors;
delete
donors
;
// if (acceptors != NULL)
if
(
acceptors
!=
NULL
)
// delete acceptors;
delete
acceptors
;
// if (donorBufferIndices != NULL)
if
(
globals
!=
NULL
)
// delete donorBufferIndices;
delete
globals
;
// if (acceptorBufferIndices != NULL)
if
(
donorExclusions
!=
NULL
)
// delete acceptorBufferIndices;
delete
donorExclusions
;
// if (globals != NULL)
if
(
acceptorExclusions
!=
NULL
)
// delete globals;
delete
acceptorExclusions
;
// if (donorExclusions != NULL)
if
(
tabulatedFunctionParams
!=
NULL
)
// delete donorExclusions;
delete
tabulatedFunctionParams
;
// if (acceptorExclusions != NULL)
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
// delete acceptorExclusions;
delete
tabulatedFunctions
[
i
];
// if (tabulatedFunctionParams != NULL)
}
// delete tabulatedFunctionParams;
// for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
static
void
addDonorAndAcceptorCode
(
stringstream
&
computeDonor
,
stringstream
&
computeAcceptor
,
const
string
&
value
)
{
// delete tabulatedFunctions[i];
computeDonor
<<
value
;
//}
computeAcceptor
<<
value
;
//
}
//static void addDonorAndAcceptorCode(stringstream& computeDonor, stringstream& computeAcceptor, const string& value) {
// computeDonor << value;
static
void
applyDonorAndAcceptorForces
(
stringstream
&
applyToDonor
,
stringstream
&
applyToAcceptor
,
int
atom
,
const
string
&
value
)
{
// computeAcceptor << value;
string
forceNames
[]
=
{
"f1"
,
"f2"
,
"f3"
};
//}
if
(
atom
<
3
)
//
applyToAcceptor
<<
forceNames
[
atom
]
<<
" += trim("
<<
value
<<
");
\n
"
;
//static void applyDonorAndAcceptorForces(stringstream& applyToDonor, stringstream& applyToAcceptor, int atom, const string& value) {
else
// string forceNames[] = {"f1", "f2", "f3"};
applyToDonor
<<
forceNames
[
atom
-
3
]
<<
" += trim("
<<
value
<<
");
\n
"
;
// if (atom < 3)
}
// applyToAcceptor << forceNames[atom]<<".xyz += "<<value<<";\n";
// else
void
CudaCalcCustomHbondForceKernel
::
initialize
(
const
System
&
system
,
const
CustomHbondForce
&
force
)
{
// applyToDonor << forceNames[atom-3]<<".xyz += "<<value<<";\n";
// Record the lists of donors and acceptors, and the parameters for each one.
//}
//
cu
.
setAsCurrent
();
//void CudaCalcCustomHbondForceKernel::initialize(const System& system, const CustomHbondForce& force) {
int
numContexts
=
cu
.
getPlatformData
().
contexts
.
size
();
// // Record the lists of donors and acceptors, and the parameters for each one.
int
startIndex
=
cu
.
getContextIndex
()
*
force
.
getNumDonors
()
/
numContexts
;
//
int
endIndex
=
(
cu
.
getContextIndex
()
+
1
)
*
force
.
getNumDonors
()
/
numContexts
;
// cu.setAsCurrent();
numDonors
=
endIndex
-
startIndex
;
// int numContexts = cu.getPlatformData().contexts.size();
numAcceptors
=
force
.
getNumAcceptors
();
// int startIndex = cu.getContextIndex()*force.getNumDonors()/numContexts;
if
(
numDonors
==
0
||
numAcceptors
==
0
)
// int endIndex = (cu.getContextIndex()+1)*force.getNumDonors()/numContexts;
return
;
// numDonors = endIndex-startIndex;
int
numParticles
=
system
.
getNumParticles
();
// numAcceptors = force.getNumAcceptors();
donors
=
CudaArray
::
create
<
int4
>
(
cu
,
numDonors
,
"customHbondDonors"
);
// if (numDonors == 0 || numAcceptors == 0)
acceptors
=
CudaArray
::
create
<
int4
>
(
cu
,
numAcceptors
,
"customHbondAcceptors"
);
// return;
donorParams
=
new
CudaParameterSet
(
cu
,
force
.
getNumPerDonorParameters
(),
numDonors
,
"customHbondDonorParameters"
);
// int numParticles = system.getNumParticles();
acceptorParams
=
new
CudaParameterSet
(
cu
,
force
.
getNumPerAcceptorParameters
(),
numAcceptors
,
"customHbondAcceptorParameters"
);
// donors = new CudaArray<mm_int4>(cu, numDonors, "customHbondDonors");
if
(
force
.
getNumGlobalParameters
()
>
0
)
// acceptors = new CudaArray<mm_int4>(cu, numAcceptors, "customHbondAcceptors");
globals
=
CudaArray
::
create
<
float
>
(
cu
,
force
.
getNumGlobalParameters
(),
"customHbondGlobals"
);
// donorParams = new CudaParameterSet(cu, force.getNumPerDonorParameters(), numDonors, "customHbondDonorParameters");
vector
<
vector
<
float
>
>
donorParamVector
(
numDonors
);
// acceptorParams = new CudaParameterSet(cu, force.getNumPerAcceptorParameters(), numAcceptors, "customHbondAcceptorParameters");
vector
<
int4
>
donorVector
(
numDonors
);
// if (force.getNumGlobalParameters() > 0)
for
(
int
i
=
0
;
i
<
numDonors
;
i
++
)
{
// globals = new CudaArray<cl_float>(cu, force.getNumGlobalParameters(), "customHbondGlobals", false, CL_MEM_READ_ONLY);
vector
<
double
>
parameters
;
// vector<vector<cl_float> > donorParamVector(numDonors);
force
.
getDonorParameters
(
startIndex
+
i
,
donorVector
[
i
].
x
,
donorVector
[
i
].
y
,
donorVector
[
i
].
z
,
parameters
);
// vector<mm_int4> donorVector(numDonors);
donorParamVector
[
i
].
resize
(
parameters
.
size
());
// for (int i = 0; i < numDonors; i++) {
for
(
int
j
=
0
;
j
<
(
int
)
parameters
.
size
();
j
++
)
// vector<double> parameters;
donorParamVector
[
i
][
j
]
=
(
float
)
parameters
[
j
];
// force.getDonorParameters(startIndex+i, donorVector[i].x, donorVector[i].y, donorVector[i].z, parameters);
}
// donorParamVector[i].resize(parameters.size());
donors
->
upload
(
donorVector
);
// for (int j = 0; j < (int) parameters.size(); j++)
donorParams
->
setParameterValues
(
donorParamVector
);
// donorParamVector[i][j] = (cl_float) parameters[j];
vector
<
vector
<
float
>
>
acceptorParamVector
(
numAcceptors
);
// }
vector
<
int4
>
acceptorVector
(
numAcceptors
);
// donors->upload(donorVector);
for
(
int
i
=
0
;
i
<
numAcceptors
;
i
++
)
{
// donorParams->setParameterValues(donorParamVector);
vector
<
double
>
parameters
;
// vector<vector<cl_float> > acceptorParamVector(numAcceptors);
force
.
getAcceptorParameters
(
i
,
acceptorVector
[
i
].
x
,
acceptorVector
[
i
].
y
,
acceptorVector
[
i
].
z
,
parameters
);
// vector<mm_int4> acceptorVector(numAcceptors);
acceptorParamVector
[
i
].
resize
(
parameters
.
size
());
// for (int i = 0; i < numAcceptors; i++) {
for
(
int
j
=
0
;
j
<
(
int
)
parameters
.
size
();
j
++
)
// vector<double> parameters;
acceptorParamVector
[
i
][
j
]
=
(
float
)
parameters
[
j
];
// force.getAcceptorParameters(i, acceptorVector[i].x, acceptorVector[i].y, acceptorVector[i].z, parameters);
}
// acceptorParamVector[i].resize(parameters.size());
acceptors
->
upload
(
acceptorVector
);
// for (int j = 0; j < (int) parameters.size(); j++)
acceptorParams
->
setParameterValues
(
acceptorParamVector
);
// acceptorParamVector[i][j] = (cl_float) parameters[j];
cu
.
addForce
(
new
CudaCustomHbondForceInfo
(
force
));
// }
// acceptors->upload(acceptorVector);
// Record exclusions.
// acceptorParams->setParameterValues(acceptorParamVector);
//
vector
<
int4
>
donorExclusionVector
(
numDonors
,
make_int4
(
-
1
,
-
1
,
-
1
,
-
1
));
// // Select an output buffer index for each donor and acceptor.
vector
<
int4
>
acceptorExclusionVector
(
numAcceptors
,
make_int4
(
-
1
,
-
1
,
-
1
,
-
1
));
//
for
(
int
i
=
0
;
i
<
force
.
getNumExclusions
();
i
++
)
{
// donorBufferIndices = new CudaArray<mm_int4>(cu, numDonors, "customHbondDonorBuffers");
int
donor
,
acceptor
;
// acceptorBufferIndices = new CudaArray<mm_int4>(cu, numAcceptors, "customHbondAcceptorBuffers");
force
.
getExclusionParticles
(
i
,
donor
,
acceptor
);
// vector<mm_int4> donorBufferVector(numDonors);
if
(
donor
<
startIndex
||
donor
>=
endIndex
)
// vector<mm_int4> acceptorBufferVector(numAcceptors);
continue
;
// vector<int> donorBufferCounter(numParticles, 0);
donor
-=
startIndex
;
// for (int i = 0; i < numDonors; i++)
if
(
donorExclusionVector
[
donor
].
x
==
-
1
)
// donorBufferVector[i] = mm_int4(donorVector[i].x > -1 ? donorBufferCounter[donorVector[i].x]++ : 0,
donorExclusionVector
[
donor
].
x
=
acceptor
;
// donorVector[i].y > -1 ? donorBufferCounter[donorVector[i].y]++ : 0,
else
if
(
donorExclusionVector
[
donor
].
y
==
-
1
)
// donorVector[i].z > -1 ? donorBufferCounter[donorVector[i].z]++ : 0, 0);
donorExclusionVector
[
donor
].
y
=
acceptor
;
// vector<int> acceptorBufferCounter(numParticles, 0);
else
if
(
donorExclusionVector
[
donor
].
z
==
-
1
)
// for (int i = 0; i < numAcceptors; i++)
donorExclusionVector
[
donor
].
z
=
acceptor
;
// acceptorBufferVector[i] = mm_int4(acceptorVector[i].x > -1 ? acceptorBufferCounter[acceptorVector[i].x]++ : 0,
else
if
(
donorExclusionVector
[
donor
].
w
==
-
1
)
// acceptorVector[i].y > -1 ? acceptorBufferCounter[acceptorVector[i].y]++ : 0,
donorExclusionVector
[
donor
].
w
=
acceptor
;
// acceptorVector[i].z > -1 ? acceptorBufferCounter[acceptorVector[i].z]++ : 0, 0);
else
// donorBufferIndices->upload(donorBufferVector);
throw
OpenMMException
(
"CustomHbondForce: CudaPlatform does not support more than four exclusions per donor"
);
// acceptorBufferIndices->upload(acceptorBufferVector);
if
(
acceptorExclusionVector
[
acceptor
].
x
==
-
1
)
// int maxBuffers = 1;
acceptorExclusionVector
[
acceptor
].
x
=
donor
;
// for (int i = 0; i < (int) donorBufferCounter.size(); i++)
else
if
(
acceptorExclusionVector
[
acceptor
].
y
==
-
1
)
// maxBuffers = max(maxBuffers, donorBufferCounter[i]);
acceptorExclusionVector
[
acceptor
].
y
=
donor
;
// for (int i = 0; i < (int) acceptorBufferCounter.size(); i++)
else
if
(
acceptorExclusionVector
[
acceptor
].
z
==
-
1
)
// maxBuffers = max(maxBuffers, acceptorBufferCounter[i]);
acceptorExclusionVector
[
acceptor
].
z
=
donor
;
// cu.addForce(new CudaCustomHbondForceInfo(maxBuffers, force));
else
if
(
acceptorExclusionVector
[
acceptor
].
w
==
-
1
)
//
acceptorExclusionVector
[
acceptor
].
w
=
donor
;
// // Record exclusions.
else
//
throw
OpenMMException
(
"CustomHbondForce: CudaPlatform does not support more than four exclusions per acceptor"
);
// vector<mm_int4> donorExclusionVector(numDonors, mm_int4(-1, -1, -1, -1));
}
// vector<mm_int4> acceptorExclusionVector(numAcceptors, mm_int4(-1, -1, -1, -1));
donorExclusions
=
CudaArray
::
create
<
int4
>
(
cu
,
numDonors
,
"customHbondDonorExclusions"
);
// for (int i = 0; i < force.getNumExclusions(); i++) {
acceptorExclusions
=
CudaArray
::
create
<
int4
>
(
cu
,
numAcceptors
,
"customHbondAcceptorExclusions"
);
// int donor, acceptor;
donorExclusions
->
upload
(
donorExclusionVector
);
// force.getExclusionParticles(i, donor, acceptor);
acceptorExclusions
->
upload
(
acceptorExclusionVector
);
// if (donor < startIndex || donor >= endIndex)
// continue;
// Record the tabulated functions.
// donor -= startIndex;
// if (donorExclusionVector[donor].x == -1)
CudaExpressionUtilities
::
FunctionPlaceholder
fp
;
// donorExclusionVector[donor].x = acceptor;
map
<
string
,
Lepton
::
CustomFunction
*>
functions
;
// else if (donorExclusionVector[donor].y == -1)
vector
<
pair
<
string
,
string
>
>
functionDefinitions
;
// donorExclusionVector[donor].y = acceptor;
vector
<
float4
>
tabulatedFunctionParamsVec
(
force
.
getNumFunctions
());
// else if (donorExclusionVector[donor].z == -1)
stringstream
tableArgs
;
// donorExclusionVector[donor].z = acceptor;
for
(
int
i
=
0
;
i
<
force
.
getNumFunctions
();
i
++
)
{
// else if (donorExclusionVector[donor].w == -1)
string
name
;
// donorExclusionVector[donor].w = acceptor;
vector
<
double
>
values
;
// else
double
min
,
max
;
// throw OpenMMException("CustomHbondForce: CudaPlatform does not support more than four exclusions per donor");
force
.
getFunctionParameters
(
i
,
name
,
values
,
min
,
max
);
// if (acceptorExclusionVector[acceptor].x == -1)
string
arrayName
=
"table"
+
cu
.
intToString
(
i
);
// acceptorExclusionVector[acceptor].x = donor;
functionDefinitions
.
push_back
(
make_pair
(
name
,
arrayName
));
// else if (acceptorExclusionVector[acceptor].y == -1)
functions
[
name
]
=
&
fp
;
// acceptorExclusionVector[acceptor].y = donor;
tabulatedFunctionParamsVec
[
i
]
=
make_float4
((
float
)
min
,
(
float
)
max
,
(
float
)
((
values
.
size
()
-
1
)
/
(
max
-
min
)),
(
float
)
values
.
size
()
-
2
);
// else if (acceptorExclusionVector[acceptor].z == -1)
vector
<
float4
>
f
=
cu
.
getExpressionUtilities
().
computeFunctionCoefficients
(
values
,
min
,
max
);
// acceptorExclusionVector[acceptor].z = donor;
tabulatedFunctions
.
push_back
(
CudaArray
::
create
<
float4
>
(
cu
,
values
.
size
()
-
1
,
"TabulatedFunction"
));
// else if (acceptorExclusionVector[acceptor].w == -1)
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
upload
(
f
);
// acceptorExclusionVector[acceptor].w = donor;
tableArgs
<<
", const float4* __restrict__ "
<<
arrayName
;
// else
}
// throw OpenMMException("CustomHbondForce: CudaPlatform does not support more than four exclusions per acceptor");
if
(
force
.
getNumFunctions
()
>
0
)
{
// }
tabulatedFunctionParams
=
CudaArray
::
create
<
float4
>
(
cu
,
tabulatedFunctionParamsVec
.
size
(),
"tabulatedFunctionParameters"
);
// donorExclusions = new CudaArray<mm_int4>(cu, numDonors, "customHbondDonorExclusions");
tabulatedFunctionParams
->
upload
(
tabulatedFunctionParamsVec
);
// acceptorExclusions = new CudaArray<mm_int4>(cu, numDonors, "customHbondAcceptorExclusions");
tableArgs
<<
", const float4* __restrict__ functionParams"
;
// donorExclusions->upload(donorExclusionVector);
}
// acceptorExclusions->upload(acceptorExclusionVector);
//
// Record information about parameters.
// // Record the tabulated functions.
//
globalParamNames
.
resize
(
force
.
getNumGlobalParameters
());
// CudaExpressionUtilities::FunctionPlaceholder fp;
globalParamValues
.
resize
(
force
.
getNumGlobalParameters
());
// map<string, Lepton::CustomFunction*> functions;
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
// vector<pair<string, string> > functionDefinitions;
globalParamNames
[
i
]
=
force
.
getGlobalParameterName
(
i
);
// vector<mm_float4> tabulatedFunctionParamsVec(force.getNumFunctions());
globalParamValues
[
i
]
=
(
float
)
force
.
getGlobalParameterDefaultValue
(
i
);
// stringstream tableArgs;
}
// for (int i = 0; i < force.getNumFunctions(); i++) {
if
(
globals
!=
NULL
)
// string name;
globals
->
upload
(
globalParamValues
);
// vector<double> values;
map
<
string
,
string
>
variables
;
// double min, max;
for
(
int
i
=
0
;
i
<
force
.
getNumPerDonorParameters
();
i
++
)
{
// force.getFunctionParameters(i, name, values, min, max);
const
string
&
name
=
force
.
getPerDonorParameterName
(
i
);
// string arrayName = "table"+cu.intToString(i);
variables
[
name
]
=
"donorParams"
+
donorParams
->
getParameterSuffix
(
i
);
// functionDefinitions.push_back(make_pair(name, arrayName));
}
// functions[name] = &fp;
for
(
int
i
=
0
;
i
<
force
.
getNumPerAcceptorParameters
();
i
++
)
{
// tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
const
string
&
name
=
force
.
getPerAcceptorParameterName
(
i
);
// vector<mm_float4> f = cu.getExpressionUtilities().computeFunctionCoefficients(values, min, max);
variables
[
name
]
=
"acceptorParams"
+
acceptorParams
->
getParameterSuffix
(
i
);
// tabulatedFunctions.push_back(new CudaArray<mm_float4>(cu, values.size()-1, "TabulatedFunction"));
}
// tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
// tableArgs << ", __global const float4* restrict " << arrayName;
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
// }
variables
[
name
]
=
"globals["
+
cu
.
intToString
(
i
)
+
"]"
;
// if (force.getNumFunctions() > 0) {
}
// tabulatedFunctionParams = new CudaArray<mm_float4>(cu, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY);
// tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
// Now to generate the kernel. First, it needs to calculate all distances, angles,
// tableArgs << ", __global const float4* restrict functionParams";
// and dihedrals the expression depends on.
// }
//
map
<
string
,
vector
<
int
>
>
distances
;
// // Record information about parameters.
map
<
string
,
vector
<
int
>
>
angles
;
//
map
<
string
,
vector
<
int
>
>
dihedrals
;
// globalParamNames.resize(force.getNumGlobalParameters());
Lepton
::
ParsedExpression
energyExpression
=
CustomHbondForceImpl
::
prepareExpression
(
force
,
functions
,
distances
,
angles
,
dihedrals
);
// globalParamValues.resize(force.getNumGlobalParameters());
map
<
string
,
Lepton
::
ParsedExpression
>
forceExpressions
;
// for (int i = 0; i < force.getNumGlobalParameters(); i++) {
set
<
string
>
computedDeltas
;
// globalParamNames[i] = force.getGlobalParameterName(i);
computedDeltas
.
insert
(
"D1A1"
);
// globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
string
atomNames
[]
=
{
"A1"
,
"A2"
,
"A3"
,
"D1"
,
"D2"
,
"D3"
};
// }
string
atomNamesLower
[]
=
{
"a1"
,
"a2"
,
"a3"
,
"d1"
,
"d2"
,
"d3"
};
// if (globals != NULL)
stringstream
computeDonor
,
computeAcceptor
,
extraArgs
;
// globals->upload(globalParamValues);
int
index
=
0
;
// map<string, string> variables;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
distances
.
begin
();
iter
!=
distances
.
end
();
++
iter
,
++
index
)
{
// for (int i = 0; i < force.getNumPerDonorParameters(); i++) {
const
vector
<
int
>&
atoms
=
iter
->
second
;
// const string& name = force.getPerDonorParameterName(i);
string
deltaName
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
// variables[name] = "donorParams"+donorParams->getParameterSuffix(i);
if
(
computedDeltas
.
count
(
deltaName
)
==
0
)
{
// }
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 delta"
+
deltaName
+
" = delta("
+
atomNamesLower
[
atoms
[
0
]]
+
", "
+
atomNamesLower
[
atoms
[
1
]]
+
");
\n
"
);
// for (int i = 0; i < force.getNumPerAcceptorParameters(); i++) {
computedDeltas
.
insert
(
deltaName
);
// const string& name = force.getPerAcceptorParameterName(i);
}
// variables[name] = "acceptorParams"+acceptorParams->getParameterSuffix(i);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real r_"
+
deltaName
+
" = SQRT(delta"
+
deltaName
+
".w);
\n
"
);
// }
variables
[
iter
->
first
]
=
"r_"
+
deltaName
;
// for (int i = 0; i < force.getNumGlobalParameters(); i++) {
forceExpressions
[
"real dEdDistance"
+
cu
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
// const string& name = force.getGlobalParameterName(i);
}
// variables[name] = "globals["+cu.intToString(i)+"]";
index
=
0
;
// }
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
angles
.
begin
();
iter
!=
angles
.
end
();
++
iter
,
++
index
)
{
//
const
vector
<
int
>&
atoms
=
iter
->
second
;
// // Now to generate the kernel. First, it needs to calculate all distances, angles,
string
deltaName1
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
0
]];
// // and dihedrals the expression depends on.
string
deltaName2
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
//
string
angleName
=
"angle_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
// map<string, vector<int> > distances;
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
// map<string, vector<int> > angles;
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 delta"
+
deltaName1
+
" = delta("
+
atomNamesLower
[
atoms
[
1
]]
+
", "
+
atomNamesLower
[
atoms
[
0
]]
+
");
\n
"
);
// map<string, vector<int> > dihedrals;
computedDeltas
.
insert
(
deltaName1
);
// Lepton::ParsedExpression energyExpression = CustomHbondForceImpl::prepareExpression(force, functions, distances, angles, dihedrals);
}
// map<string, Lepton::ParsedExpression> forceExpressions;
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
// set<string> computedDeltas;
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 delta"
+
deltaName2
+
" = delta("
+
atomNamesLower
[
atoms
[
1
]]
+
", "
+
atomNamesLower
[
atoms
[
2
]]
+
");
\n
"
);
// computedDeltas.insert("D1A1");
computedDeltas
.
insert
(
deltaName2
);
// string atomNames[] = {"A1", "A2", "A3", "D1", "D2", "D3"};
}
// string atomNamesLower[] = {"a1", "a2", "a3", "d1", "d2", "d3"};
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real "
+
angleName
+
" = computeAngle(delta"
+
deltaName1
+
", delta"
+
deltaName2
+
");
\n
"
);
// stringstream computeDonor, computeAcceptor, extraArgs;
variables
[
iter
->
first
]
=
angleName
;
// int index = 0;
forceExpressions
[
"real dEdAngle"
+
cu
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
// for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter, ++index) {
}
// const vector<int>& atoms = iter->second;
index
=
0
;
// string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]];
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
dihedrals
.
begin
();
iter
!=
dihedrals
.
end
();
++
iter
,
++
index
)
{
// if (computedDeltas.count(deltaName) == 0) {
const
vector
<
int
>&
atoms
=
iter
->
second
;
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+");\n");
string
deltaName1
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
// computedDeltas.insert(deltaName);
string
deltaName2
=
atomNames
[
atoms
[
2
]]
+
atomNames
[
atoms
[
1
]];
// }
string
deltaName3
=
atomNames
[
atoms
[
2
]]
+
atomNames
[
atoms
[
3
]];
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float r_"+deltaName+" = sqrt(delta"+deltaName+".w);\n");
string
crossName1
=
"cross_"
+
deltaName1
+
"_"
+
deltaName2
;
// variables[iter->first] = "r_"+deltaName;
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
// forceExpressions["float dEdDistance"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
string
dihedralName
=
"dihedral_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]]
+
atomNames
[
atoms
[
3
]];
// }
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
// index = 0;
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 delta"
+
deltaName1
+
" = delta("
+
atomNamesLower
[
atoms
[
0
]]
+
", "
+
atomNamesLower
[
atoms
[
1
]]
+
");
\n
"
);
// for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter, ++index) {
computedDeltas
.
insert
(
deltaName1
);
// const vector<int>& atoms = iter->second;
}
// string deltaName1 = atomNames[atoms[1]]+atomNames[atoms[0]];
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
// string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]];
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 delta"
+
deltaName2
+
" = delta("
+
atomNamesLower
[
atoms
[
2
]]
+
", "
+
atomNamesLower
[
atoms
[
1
]]
+
");
\n
"
);
// string angleName = "angle_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]];
computedDeltas
.
insert
(
deltaName2
);
// if (computedDeltas.count(deltaName1) == 0) {
}
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[0]]+");\n");
if
(
computedDeltas
.
count
(
deltaName3
)
==
0
)
{
// computedDeltas.insert(deltaName1);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 delta"
+
deltaName3
+
" = delta("
+
atomNamesLower
[
atoms
[
2
]]
+
", "
+
atomNamesLower
[
atoms
[
3
]]
+
");
\n
"
);
// }
computedDeltas
.
insert
(
deltaName3
);
// if (computedDeltas.count(deltaName2) == 0) {
}
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[2]]+");\n");
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 "
+
crossName1
+
" = computeCross(delta"
+
deltaName1
+
", delta"
+
deltaName2
+
");
\n
"
);
// computedDeltas.insert(deltaName2);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 "
+
crossName2
+
" = computeCross(delta"
+
deltaName2
+
", delta"
+
deltaName3
+
");
\n
"
);
// }
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real "
+
dihedralName
+
" = computeAngle("
+
crossName1
+
", "
+
crossName2
+
");
\n
"
);
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float "+angleName+" = computeAngle(delta"+deltaName1+", delta"+deltaName2+");\n");
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
dihedralName
+
" *= (delta"
+
deltaName1
+
".x*"
+
crossName2
+
".x + delta"
+
deltaName1
+
".y*"
+
crossName2
+
".y + delta"
+
deltaName1
+
".z*"
+
crossName2
+
".z < 0 ? -1 : 1);
\n
"
);
// variables[iter->first] = angleName;
variables
[
iter
->
first
]
=
dihedralName
;
// forceExpressions["float dEdAngle"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
forceExpressions
[
"real dEdDihedral"
+
cu
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
// }
}
// index = 0;
// for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter, ++index) {
// Next it needs to load parameters from global memory.
// const vector<int>& atoms = iter->second;
// string deltaName1 = atomNames[atoms[0]]+atomNames[atoms[1]];
if
(
force
.
getNumGlobalParameters
()
>
0
)
// string deltaName2 = atomNames[atoms[2]]+atomNames[atoms[1]];
extraArgs
<<
", const float* __restrict__ globals"
;
// string deltaName3 = atomNames[atoms[2]]+atomNames[atoms[3]];
for
(
int
i
=
0
;
i
<
(
int
)
donorParams
->
getBuffers
().
size
();
i
++
)
{
// string crossName1 = "cross_"+deltaName1+"_"+deltaName2;
const
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
donorParams
->
getBuffers
()[
i
];
// string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
extraArgs
<<
", const "
+
buffer
.
getType
()
+
"* __restrict__ donor"
+
buffer
.
getName
();
// string dihedralName = "dihedral_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]+atomNames[atoms[3]];
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
buffer
.
getType
()
+
" donorParams"
+
cu
.
intToString
(
i
+
1
)
+
" = donor"
+
buffer
.
getName
()
+
"[index];
\n
"
);
// if (computedDeltas.count(deltaName1) == 0) {
}
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+");\n");
for
(
int
i
=
0
;
i
<
(
int
)
acceptorParams
->
getBuffers
().
size
();
i
++
)
{
// computedDeltas.insert(deltaName1);
const
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
acceptorParams
->
getBuffers
()[
i
];
// }
extraArgs
<<
", const "
+
buffer
.
getType
()
+
"* __restrict__ acceptor"
+
buffer
.
getName
();
// if (computedDeltas.count(deltaName2) == 0) {
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
buffer
.
getType
()
+
" acceptorParams"
+
cu
.
intToString
(
i
+
1
)
+
" = acceptor"
+
buffer
.
getName
()
+
"[index];
\n
"
);
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[1]]+");\n");
}
// computedDeltas.insert(deltaName2);
// }
// Now evaluate the expressions.
// if (computedDeltas.count(deltaName3) == 0) {
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName3+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[3]]+");\n");
computeAcceptor
<<
cu
.
getExpressionUtilities
().
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
"temp"
,
"functionParams"
);
// computedDeltas.insert(deltaName3);
forceExpressions
[
"energy += "
]
=
energyExpression
;
// }
computeDonor
<<
cu
.
getExpressionUtilities
().
createExpressions
(
forceExpressions
,
variables
,
functionDefinitions
,
"temp"
,
"functionParams"
);
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 "+crossName1+" = computeCross(delta"+deltaName1+", delta"+deltaName2+");\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 "+crossName2+" = computeCross(delta"+deltaName2+", delta"+deltaName3+");\n");
// Finally, apply forces to atoms.
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float "+dihedralName+" = computeAngle("+crossName1+", "+crossName2+");\n");
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, dihedralName+" *= (delta"+deltaName1+".x*"+crossName2+".x + delta"+deltaName1+".y*"+crossName2+".y + delta"+deltaName1+".z*"+crossName2+".z < 0 ? -1 : 1);\n");
index
=
0
;
// variables[iter->first] = dihedralName;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
distances
.
begin
();
iter
!=
distances
.
end
();
++
iter
,
++
index
)
{
// forceExpressions["float dEdDihedral"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
const
vector
<
int
>&
atoms
=
iter
->
second
;
// }
string
deltaName
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
//
string
value
=
"(dEdDistance"
+
cu
.
intToString
(
index
)
+
"/r_"
+
deltaName
+
")*delta"
+
deltaName
;
// // Next it needs to load parameters from global memory.
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
0
],
"-"
+
value
);
//
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
1
],
value
);
// if (force.getNumGlobalParameters() > 0)
}
// extraArgs << ", __global const float* restrict globals";
index
=
0
;
// for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
angles
.
begin
();
iter
!=
angles
.
end
();
++
iter
,
++
index
)
{
// const CudaNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
const
vector
<
int
>&
atoms
=
iter
->
second
;
// extraArgs << ", __global const "+buffer.getType()+"* restrict donor"+buffer.getName();
string
deltaName1
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
0
]];
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" donorParams"+cu.intToString(i+1)+" = donor"+buffer.getName()+"[index];\n");
string
deltaName2
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
// }
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"{
\n
"
);
// for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) {
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real3 crossProd = cross(delta"
+
deltaName2
+
", delta"
+
deltaName1
+
");
\n
"
);
// const CudaNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i];
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real lengthCross = max(SQRT(dot(crossProd,crossProd)), 1e-6f);
\n
"
);
// extraArgs << ", __global const "+buffer.getType()+"* restrict acceptor"+buffer.getName();
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real3 deltaCross0 = -cross(trim(delta"
+
deltaName1
+
"), crossProd)*dEdAngle"
+
cu
.
intToString
(
index
)
+
"/(delta"
+
deltaName1
+
".w*lengthCross);
\n
"
);
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" acceptorParams"+cu.intToString(i+1)+" = acceptor"+buffer.getName()+"[index];\n");
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real3 deltaCross2 = cross(trim(delta"
+
deltaName2
+
"), crossProd)*dEdAngle"
+
cu
.
intToString
(
index
)
+
"/(delta"
+
deltaName2
+
".w*lengthCross);
\n
"
);
// }
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real3 deltaCross1 = -(deltaCross0+deltaCross2);
\n
"
);
//
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
0
],
"deltaCross0"
);
// // Now evaluate the expressions.
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
1
],
"deltaCross1"
);
//
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
2
],
"deltaCross2"
);
// computeAcceptor << cu.getExpressionUtilities().createExpressions(forceExpressions, variables, functionDefinitions, "temp", "functionParams");
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"}
\n
"
);
// forceExpressions["energy += "] = energyExpression;
}
// computeDonor << cu.getExpressionUtilities().createExpressions(forceExpressions, variables, functionDefinitions, "temp", "functionParams");
index
=
0
;
//
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
dihedrals
.
begin
();
iter
!=
dihedrals
.
end
();
++
iter
,
++
index
)
{
// // Finally, apply forces to atoms.
const
vector
<
int
>&
atoms
=
iter
->
second
;
//
string
deltaName1
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
// index = 0;
string
deltaName2
=
atomNames
[
atoms
[
2
]]
+
atomNames
[
atoms
[
1
]];
// for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter, ++index) {
string
deltaName3
=
atomNames
[
atoms
[
2
]]
+
atomNames
[
atoms
[
3
]];
// const vector<int>& atoms = iter->second;
string
crossName1
=
"cross_"
+
deltaName1
+
"_"
+
deltaName2
;
// string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]];
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
// string value = "(dEdDistance"+cu.intToString(index)+"/r_"+deltaName+")*delta"+deltaName+".xyz";
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"{
\n
"
);
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[0], "-"+value);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real r = SQRT(delta"
+
deltaName2
+
".w);
\n
"
);
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[1], value);
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 ff;
\n
"
);
// }
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.x = (-dEdDihedral"
+
cu
.
intToString
(
index
)
+
"*r)/"
+
crossName1
+
".w;
\n
"
);
// index = 0;
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.y = (delta"
+
deltaName1
+
".x*delta"
+
deltaName2
+
".x + delta"
+
deltaName1
+
".y*delta"
+
deltaName2
+
".y + delta"
+
deltaName1
+
".z*delta"
+
deltaName2
+
".z)/delta"
+
deltaName2
+
".w;
\n
"
);
// for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter, ++index) {
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.z = (delta"
+
deltaName3
+
".x*delta"
+
deltaName2
+
".x + delta"
+
deltaName3
+
".y*delta"
+
deltaName2
+
".y + delta"
+
deltaName3
+
".z*delta"
+
deltaName2
+
".z)/delta"
+
deltaName2
+
".w;
\n
"
);
// const vector<int>& atoms = iter->second;
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"ff.w = (dEdDihedral"
+
cu
.
intToString
(
index
)
+
"*r)/"
+
crossName2
+
".w;
\n
"
);
// string deltaName1 = atomNames[atoms[1]]+atomNames[atoms[0]];
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 internalF0 = ff.x*"
+
crossName1
+
";
\n
"
);
// string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]];
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 internalF3 = ff.w*"
+
crossName2
+
";
\n
"
);
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "{\n");
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"real4 s = ff.y*internalF0 - ff.z*internalF3;
\n
"
);
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 crossProd = cross(delta"+deltaName2+", delta"+deltaName1+");\n");
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
0
],
"internalF0"
);
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float lengthCross = max(length(crossProd), 1e-6f);\n");
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
1
],
"s-internalF0"
);
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 deltaCross0 = -cross(delta"+deltaName1+", crossProd)*dEdAngle"+cu.intToString(index)+"/(delta"+deltaName1+".w*lengthCross);\n");
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
2
],
"-s-internalF3"
);
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 deltaCross2 = cross(delta"+deltaName2+", crossProd)*dEdAngle"+cu.intToString(index)+"/(delta"+deltaName2+".w*lengthCross);\n");
applyDonorAndAcceptorForces
(
computeDonor
,
computeAcceptor
,
atoms
[
3
],
"internalF3"
);
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 deltaCross1 = -(deltaCross0+deltaCross2);\n");
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
"}
\n
"
);
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[0], "deltaCross0.xyz");
}
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[1], "deltaCross1.xyz");
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[2], "deltaCross2.xyz");
// Generate the kernels.
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "}\n");
// }
map
<
string
,
string
>
replacements
;
// index = 0;
replacements
[
"COMPUTE_DONOR_FORCE"
]
=
computeDonor
.
str
();
// for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter, ++index) {
replacements
[
"COMPUTE_ACCEPTOR_FORCE"
]
=
computeAcceptor
.
str
();
// const vector<int>& atoms = iter->second;
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
// string deltaName1 = atomNames[atoms[0]]+atomNames[atoms[1]];
map
<
string
,
string
>
defines
;
// string deltaName2 = atomNames[atoms[2]]+atomNames[atoms[1]];
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
// string deltaName3 = atomNames[atoms[2]]+atomNames[atoms[3]];
defines
[
"NUM_DONORS"
]
=
cu
.
intToString
(
numDonors
);
// string crossName1 = "cross_"+deltaName1+"_"+deltaName2;
defines
[
"NUM_ACCEPTORS"
]
=
cu
.
intToString
(
numAcceptors
);
// string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
defines
[
"M_PI"
]
=
cu
.
doubleToString
(
M_PI
);
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "{\n");
if
(
force
.
getNonbondedMethod
()
!=
CustomHbondForce
::
NoCutoff
)
{
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float r = sqrt(delta"+deltaName2+".w);\n");
defines
[
"USE_CUTOFF"
]
=
"1"
;
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 ff;\n");
defines
[
"CUTOFF_SQUARED"
]
=
cu
.
doubleToString
(
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
());
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.x = (-dEdDihedral"+cu.intToString(index)+"*r)/"+crossName1+".w;\n");
}
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.y = (delta"+deltaName1+".x*delta"+deltaName2+".x + delta"+deltaName1+".y*delta"+deltaName2+".y + delta"+deltaName1+".z*delta"+deltaName2+".z)/delta"+deltaName2+".w;\n");
if
(
force
.
getNonbondedMethod
()
!=
CustomHbondForce
::
NoCutoff
&&
force
.
getNonbondedMethod
()
!=
CustomHbondForce
::
CutoffNonPeriodic
)
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.z = (delta"+deltaName3+".x*delta"+deltaName2+".x + delta"+deltaName3+".y*delta"+deltaName2+".y + delta"+deltaName3+".z*delta"+deltaName2+".z)/delta"+deltaName2+".w;\n");
defines
[
"USE_PERIODIC"
]
=
"1"
;
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.w = (dEdDihedral"+cu.intToString(index)+"*r)/"+crossName2+".w;\n");
if
(
force
.
getNumExclusions
()
>
0
)
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 internalF0 = ff.x*"+crossName1+";\n");
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 internalF3 = ff.w*"+crossName2+";\n");
CUmodule
module
=
cu
.
createModule
(
cu
.
replaceStrings
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
customHbondForce
,
replacements
),
defines
);
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 s = ff.y*internalF0 - ff.z*internalF3;\n");
donorKernel
=
cu
.
getKernel
(
module
,
"computeDonorForces"
);
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[0], "internalF0.xyz");
acceptorKernel
=
cu
.
getKernel
(
module
,
"computeAcceptorForces"
);
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[1], "s.xyz-internalF0.xyz");
}
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[2], "-s.xyz-internalF3.xyz");
// applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[3], "internalF3.xyz");
double
CudaCalcCustomHbondForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
// addDonorAndAcceptorCode(computeDonor, computeAcceptor, "}\n");
if
(
numDonors
==
0
||
numAcceptors
==
0
)
// }
return
0.0
;
//
if
(
globals
!=
NULL
)
{
// // Generate the kernels.
bool
changed
=
false
;
//
for
(
int
i
=
0
;
i
<
(
int
)
globalParamNames
.
size
();
i
++
)
{
// map<string, string> replacements;
float
value
=
(
float
)
context
.
getParameter
(
globalParamNames
[
i
]);
// replacements["COMPUTE_DONOR_FORCE"] = computeDonor.str();
if
(
value
!=
globalParamValues
[
i
])
// replacements["COMPUTE_ACCEPTOR_FORCE"] = computeAcceptor.str();
changed
=
true
;
// replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
globalParamValues
[
i
]
=
value
;
// map<string, string> defines;
}
// defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
if
(
changed
)
// defines["NUM_DONORS"] = cu.intToString(numDonors);
globals
->
upload
(
globalParamValues
);
// defines["NUM_ACCEPTORS"] = cu.intToString(numAcceptors);
}
// defines["M_PI"] = cu.doubleToString(M_PI);
if
(
!
hasInitializedKernel
)
{
// if (force.getNonbondedMethod() != CustomHbondForce::NoCutoff) {
hasInitializedKernel
=
true
;
// defines["USE_CUTOFF"] = "1";
int
index
=
0
;
// defines["CUTOFF_SQUARED"] = cu.doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
donorArgs
.
push_back
(
&
cu
.
getForce
().
getDevicePointer
());
// }
donorArgs
.
push_back
(
&
cu
.
getEnergyBuffer
().
getDevicePointer
());
// if (force.getNonbondedMethod() != CustomHbondForce::NoCutoff && force.getNonbondedMethod() != CustomHbondForce::CutoffNonPeriodic)
donorArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
// defines["USE_PERIODIC"] = "1";
donorArgs
.
push_back
(
&
donorExclusions
->
getDevicePointer
());
// if (force.getNumExclusions() > 0)
donorArgs
.
push_back
(
&
donors
->
getDevicePointer
());
// defines["USE_EXCLUSIONS"] = "1";
donorArgs
.
push_back
(
&
acceptors
->
getDevicePointer
());
// CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customHbondForce, replacements), defines);
donorArgs
.
push_back
(
cu
.
getPeriodicBoxSizePointer
());
// donorKernel = cu.getKernel(module, "computeDonorForces");
donorArgs
.
push_back
(
cu
.
getInvPeriodicBoxSizePointer
());
// acceptorKernel = cu.getKernel(module, "computeAcceptorForces");
if
(
globals
!=
NULL
)
//}
donorArgs
.
push_back
(
&
globals
->
getDevicePointer
());
//
for
(
int
i
=
0
;
i
<
(
int
)
donorParams
->
getBuffers
().
size
();
i
++
)
{
//double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
donorParams
->
getBuffers
()[
i
];
// if (numDonors == 0 || numAcceptors == 0)
donorArgs
.
push_back
(
&
buffer
.
getMemory
());
// return 0.0;
}
// if (globals != NULL) {
for
(
int
i
=
0
;
i
<
(
int
)
acceptorParams
->
getBuffers
().
size
();
i
++
)
{
// bool changed = false;
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
acceptorParams
->
getBuffers
()[
i
];
// for (int i = 0; i < (int) globalParamNames.size(); i++) {
donorArgs
.
push_back
(
&
buffer
.
getMemory
());
// cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
}
// if (value != globalParamValues[i])
if
(
tabulatedFunctionParams
!=
NULL
)
{
// changed = true;
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
// globalParamValues[i] = value;
donorArgs
.
push_back
(
&
tabulatedFunctions
[
i
]
->
getDevicePointer
());
// }
donorArgs
.
push_back
(
&
tabulatedFunctionParams
->
getDevicePointer
());
// if (changed)
}
// globals->upload(globalParamValues);
index
=
0
;
// }
acceptorArgs
.
push_back
(
&
cu
.
getForce
().
getDevicePointer
());
// if (!hasInitializedKernel) {
acceptorArgs
.
push_back
(
&
cu
.
getEnergyBuffer
().
getDevicePointer
());
// hasInitializedKernel = true;
acceptorArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
// int index = 0;
acceptorArgs
.
push_back
(
&
acceptorExclusions
->
getDevicePointer
());
// donorKernel.setArg<cu::Buffer>(index++, cu.getForceBuffers().getDevicePointer());
acceptorArgs
.
push_back
(
&
donors
->
getDevicePointer
());
// donorKernel.setArg<cu::Buffer>(index++, cu.getEnergyBuffer().getDevicePointer());
acceptorArgs
.
push_back
(
&
acceptors
->
getDevicePointer
());
// donorKernel.setArg<cu::Buffer>(index++, cu.getPosq().getDevicePointer());
acceptorArgs
.
push_back
(
cu
.
getPeriodicBoxSizePointer
());
// donorKernel.setArg<cu::Buffer>(index++, donorExclusions->getDevicePointer());
acceptorArgs
.
push_back
(
cu
.
getInvPeriodicBoxSizePointer
());
// donorKernel.setArg<cu::Buffer>(index++, donors->getDevicePointer());
if
(
globals
!=
NULL
)
// donorKernel.setArg<cu::Buffer>(index++, acceptors->getDevicePointer());
acceptorArgs
.
push_back
(
&
globals
->
getDevicePointer
());
// donorKernel.setArg<cu::Buffer>(index++, donorBufferIndices->getDevicePointer());
for
(
int
i
=
0
;
i
<
(
int
)
donorParams
->
getBuffers
().
size
();
i
++
)
{
// donorKernel.setArg(index++, 3*CudaContext::ThreadBlockSize*sizeof(mm_float4), NULL);
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
donorParams
->
getBuffers
()[
i
];
// index += 2; // Periodic box size arguments are set when the kernel is executed.
acceptorArgs
.
push_back
(
&
buffer
.
getMemory
());
// if (globals != NULL)
}
// donorKernel.setArg<cu::Buffer>(index++, globals->getDevicePointer());
for
(
int
i
=
0
;
i
<
(
int
)
acceptorParams
->
getBuffers
().
size
();
i
++
)
{
// for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
acceptorParams
->
getBuffers
()[
i
];
// const CudaNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
acceptorArgs
.
push_back
(
&
buffer
.
getMemory
());
// donorKernel.setArg<cu::Memory>(index++, buffer.getMemory());
}
// }
if
(
tabulatedFunctionParams
!=
NULL
)
{
// for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) {
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
// const CudaNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i];
acceptorArgs
.
push_back
(
&
tabulatedFunctions
[
i
]
->
getDevicePointer
());
// donorKernel.setArg<cu::Memory>(index++, buffer.getMemory());
acceptorArgs
.
push_back
(
&
tabulatedFunctionParams
->
getDevicePointer
());
// }
}
// if (tabulatedFunctionParams != NULL) {
}
// for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
int
sharedMemorySize
=
3
*
CudaContext
::
ThreadBlockSize
*
sizeof
(
float4
);
// donorKernel.setArg<cu::Buffer>(index++, tabulatedFunctions[i]->getDevicePointer());
cu
.
executeKernel
(
donorKernel
,
&
donorArgs
[
0
],
max
(
numDonors
,
numAcceptors
),
CudaContext
::
ThreadBlockSize
,
sharedMemorySize
);
// donorKernel.setArg<cu::Buffer>(index++, tabulatedFunctionParams->getDevicePointer());
cu
.
executeKernel
(
acceptorKernel
,
&
acceptorArgs
[
0
],
max
(
numDonors
,
numAcceptors
),
CudaContext
::
ThreadBlockSize
,
sharedMemorySize
);
// }
return
0.0
;
// index = 0;
}
// acceptorKernel.setArg<cu::Buffer>(index++, cu.getForceBuffers().getDevicePointer());
// acceptorKernel.setArg<cu::Buffer>(index++, cu.getEnergyBuffer().getDevicePointer());
void
CudaCalcCustomHbondForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomHbondForce
&
force
)
{
// acceptorKernel.setArg<cu::Buffer>(index++, cu.getPosq().getDevicePointer());
cu
.
setAsCurrent
();
// acceptorKernel.setArg<cu::Buffer>(index++, acceptorExclusions->getDevicePointer());
int
numContexts
=
cu
.
getPlatformData
().
contexts
.
size
();
// acceptorKernel.setArg<cu::Buffer>(index++, donors->getDevicePointer());
int
startIndex
=
cu
.
getContextIndex
()
*
force
.
getNumDonors
()
/
numContexts
;
// acceptorKernel.setArg<cu::Buffer>(index++, acceptors->getDevicePointer());
int
endIndex
=
(
cu
.
getContextIndex
()
+
1
)
*
force
.
getNumDonors
()
/
numContexts
;
// acceptorKernel.setArg<cu::Buffer>(index++, acceptorBufferIndices->getDevicePointer());
if
(
numDonors
!=
endIndex
-
startIndex
)
// acceptorKernel.setArg(index++, 3*CudaContext::ThreadBlockSize*sizeof(mm_float4), NULL);
throw
OpenMMException
(
"updateParametersInContext: The number of donors has changed"
);
// index += 2; // Periodic box size arguments are set when the kernel is executed.
if
(
numAcceptors
!=
force
.
getNumAcceptors
())
// if (globals != NULL)
throw
OpenMMException
(
"updateParametersInContext: The number of acceptors has changed"
);
// acceptorKernel.setArg<cu::Buffer>(index++, globals->getDevicePointer());
// for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
// Record the per-donor parameters.
// const CudaNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
// acceptorKernel.setArg<cu::Memory>(index++, buffer.getMemory());
vector
<
vector
<
float
>
>
donorParamVector
(
numDonors
);
// }
vector
<
double
>
parameters
;
// for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) {
for
(
int
i
=
0
;
i
<
numDonors
;
i
++
)
{
// const CudaNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i];
int
d1
,
d2
,
d3
;
// acceptorKernel.setArg<cu::Memory>(index++, buffer.getMemory());
force
.
getDonorParameters
(
startIndex
+
i
,
d1
,
d2
,
d3
,
parameters
);
// }
donorParamVector
[
i
].
resize
(
parameters
.
size
());
// if (tabulatedFunctionParams != NULL) {
for
(
int
j
=
0
;
j
<
(
int
)
parameters
.
size
();
j
++
)
// for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
donorParamVector
[
i
][
j
]
=
(
float
)
parameters
[
j
];
// acceptorKernel.setArg<cu::Buffer>(index++, tabulatedFunctions[i]->getDevicePointer());
}
// acceptorKernel.setArg<cu::Buffer>(index++, tabulatedFunctionParams->getDevicePointer());
donorParams
->
setParameterValues
(
donorParamVector
);
// }
// }
// Record the per-acceptor parameters.
// donorKernel.setArg<mm_float4>(8, cu.getPeriodicBoxSize());
// donorKernel.setArg<mm_float4>(9, cu.getInvPeriodicBoxSize());
vector
<
vector
<
float
>
>
acceptorParamVector
(
numAcceptors
);
// cu.executeKernel(donorKernel, max(numDonors, numAcceptors));
for
(
int
i
=
0
;
i
<
numAcceptors
;
i
++
)
{
// acceptorKernel.setArg<mm_float4>(8, cu.getPeriodicBoxSize());
int
a1
,
a2
,
a3
;
// acceptorKernel.setArg<mm_float4>(9, cu.getInvPeriodicBoxSize());
force
.
getAcceptorParameters
(
i
,
a1
,
a2
,
a3
,
parameters
);
// cu.executeKernel(acceptorKernel, max(numDonors, numAcceptors));
acceptorParamVector
[
i
].
resize
(
parameters
.
size
());
// return 0.0;
for
(
int
j
=
0
;
j
<
(
int
)
parameters
.
size
();
j
++
)
//}
acceptorParamVector
[
i
][
j
]
=
(
float
)
parameters
[
j
];
//
}
//void CudaCalcCustomHbondForceKernel::copyParametersToContext(ContextImpl& context, const CustomHbondForce& force) {
acceptorParams
->
setParameterValues
(
acceptorParamVector
);
// cu.setAsCurrent();
// int numContexts = cu.getPlatformData().contexts.size();
// Mark that the current reordering may be invalid.
// int startIndex = cu.getContextIndex()*force.getNumDonors()/numContexts;
// int endIndex = (cu.getContextIndex()+1)*force.getNumDonors()/numContexts;
cu
.
invalidateMolecules
();
// if (numDonors != endIndex-startIndex)
}
// throw OpenMMException("updateParametersInContext: The number of donors has changed");
// if (numAcceptors != force.getNumAcceptors())
// throw OpenMMException("updateParametersInContext: The number of acceptors has changed");
//
// // Record the per-donor parameters.
//
// vector<vector<cl_float> > donorParamVector(numDonors);
// vector<double> parameters;
// for (int i = 0; i < numDonors; i++) {
// int d1, d2, d3;
// force.getDonorParameters(startIndex+i, d1, d2, d3, parameters);
// donorParamVector[i].resize(parameters.size());
// for (int j = 0; j < (int) parameters.size(); j++)
// donorParamVector[i][j] = (cl_float) parameters[j];
// }
// donorParams->setParameterValues(donorParamVector);
//
// // Record the per-acceptor parameters.
//
// vector<vector<cl_float> > acceptorParamVector(numAcceptors);
// for (int i = 0; i < numAcceptors; i++) {
// int a1, a2, a3;
// force.getAcceptorParameters(i, a1, a2, a3, parameters);
// acceptorParamVector[i].resize(parameters.size());
// for (int j = 0; j < (int) parameters.size(); j++)
// acceptorParamVector[i][j] = (cl_float) parameters[j];
// }
// acceptorParams->setParameterValues(acceptorParamVector);
//
// // Mark that the current reordering may be invalid.
//
// cu.invalidateMolecules();
//}
class
CudaCustomCompoundBondForceInfo
:
public
CudaForceInfo
{
class
CudaCustomCompoundBondForceInfo
:
public
CudaForceInfo
{
public:
public:
...
@@ -4288,679 +4255,784 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
...
@@ -4288,679 +4255,784 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
// Update the time and step count.
// Update the time and step count.
double
dt
,
time
;
double
dt
,
time
;
if
(
cu
.
getUseDoublePrecision
())
{
double2
stepSize
;
cu
.
getIntegrationUtilities
().
getStepSize
().
download
(
&
stepSize
);
dt
=
stepSize
.
y
;
time
=
cu
.
getTime
()
+
dt
;
if
(
dt
==
maxStepSize
)
time
=
maxTime
;
// Avoid round-off error
}
else
{
float2
stepSize
;
cu
.
getIntegrationUtilities
().
getStepSize
().
download
(
&
stepSize
);
dt
=
stepSize
.
y
;
time
=
cu
.
getTime
()
+
dt
;
if
(
dt
==
maxStepSizeFloat
)
time
=
maxTime
;
// Avoid round-off error
}
cu
.
setTime
(
time
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
return
dt
;
}
CudaIntegrateVariableLangevinStepKernel
::~
CudaIntegrateVariableLangevinStepKernel
()
{
cu
.
setAsCurrent
();
if
(
params
!=
NULL
)
delete
params
;
}
void
CudaIntegrateVariableLangevinStepKernel
::
initialize
(
const
System
&
system
,
const
VariableLangevinIntegrator
&
integrator
)
{
cu
.
setAsCurrent
();
cu
.
getPlatformData
().
initializeContexts
(
system
);
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
langevin
,
defines
,
""
);
kernel1
=
cu
.
getKernel
(
module
,
"integrateLangevinPart1"
);
kernel2
=
cu
.
getKernel
(
module
,
"integrateLangevinPart2"
);
selectSizeKernel
=
cu
.
getKernel
(
module
,
"selectLangevinStepSize"
);
params
=
CudaArray
::
create
<
float
>
(
cu
,
3
,
"langevinParams"
);
blockSize
=
min
(
256
,
system
.
getNumParticles
());
blockSize
=
max
(
blockSize
,
params
->
getSize
());
}
double
CudaIntegrateVariableLangevinStepKernel
::
execute
(
ContextImpl
&
context
,
const
VariableLangevinIntegrator
&
integrator
,
double
maxTime
)
{
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
int
numAtoms
=
cu
.
getNumAtoms
();
// Select the step size to use.
double
maxStepSize
=
maxTime
-
cu
.
getTime
();
float
maxStepSizeFloat
=
(
float
)
maxStepSize
;
double
tol
=
integrator
.
getErrorTolerance
();
float
tolFloat
=
(
float
)
tol
;
double
tau
=
integrator
.
getFriction
()
==
0.0
?
0.0
:
1.0
/
integrator
.
getFriction
();
float
tauFloat
=
(
float
)
tau
;
double
kT
=
BOLTZ
*
integrator
.
getTemperature
();
float
kTFloat
=
(
float
)
kT
;
void
*
argsSelect
[]
=
{
cu
.
getUseDoublePrecision
()
?
(
void
*
)
&
maxStepSize
:
(
void
*
)
&
maxStepSizeFloat
,
cu
.
getUseDoublePrecision
()
?
(
void
*
)
&
tol
:
(
void
*
)
&
tolFloat
,
cu
.
getUseDoublePrecision
()
?
(
void
*
)
&
tau
:
(
void
*
)
&
tauFloat
,
cu
.
getUseDoublePrecision
()
?
(
void
*
)
&
kT
:
(
void
*
)
&
kTFloat
,
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
params
->
getDevicePointer
()};
int
sharedSize
=
blockSize
*
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
cu
.
executeKernel
(
selectSizeKernel
,
argsSelect
,
blockSize
,
blockSize
,
sharedSize
);
// Call the first integration kernel.
int
randomIndex
=
integration
.
prepareRandomNumbers
(
cu
.
getPaddedNumAtoms
());
void
*
args1
[]
=
{
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
params
->
getDevicePointer
(),
&
integration
.
getStepSize
().
getDevicePointer
(),
&
integration
.
getRandom
().
getDevicePointer
(),
&
randomIndex
};
cu
.
executeKernel
(
kernel1
,
args1
,
numAtoms
);
// Apply constraints.
integration
.
applyConstraints
(
integrator
.
getConstraintTolerance
());
// Call the second integration kernel.
void
*
args2
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getStepSize
().
getDevicePointer
()};
cu
.
executeKernel
(
kernel2
,
args2
,
numAtoms
);
integration
.
computeVirtualSites
();
// Update the time and step count.
double
dt
,
time
;
if
(
cu
.
getUseDoublePrecision
())
{
double2
stepSize
;
cu
.
getIntegrationUtilities
().
getStepSize
().
download
(
&
stepSize
);
dt
=
stepSize
.
y
;
time
=
cu
.
getTime
()
+
dt
;
if
(
dt
==
maxStepSize
)
time
=
maxTime
;
// Avoid round-off error
}
else
{
float2
stepSize
;
cu
.
getIntegrationUtilities
().
getStepSize
().
download
(
&
stepSize
);
dt
=
stepSize
.
y
;
time
=
cu
.
getTime
()
+
dt
;
if
(
dt
==
maxStepSizeFloat
)
time
=
maxTime
;
// Avoid round-off error
}
cu
.
setTime
(
time
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
return
dt
;
}
class
CudaIntegrateCustomStepKernel
::
ReorderListener
:
public
CudaContext
::
ReorderListener
{
public:
ReorderListener
(
CudaContext
&
cu
,
CudaParameterSet
&
perDofValues
,
vector
<
vector
<
float
>
>&
localPerDofValuesFloat
,
vector
<
vector
<
double
>
>&
localPerDofValuesDouble
,
bool
&
deviceValuesAreCurrent
)
:
cu
(
cu
),
perDofValues
(
perDofValues
),
localPerDofValuesFloat
(
localPerDofValuesFloat
),
localPerDofValuesDouble
(
localPerDofValuesDouble
),
deviceValuesAreCurrent
(
deviceValuesAreCurrent
)
{
int
numAtoms
=
cu
.
getNumAtoms
();
lastAtomOrder
.
resize
(
numAtoms
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
lastAtomOrder
[
i
]
=
cu
.
getAtomIndex
()[
i
];
}
void
execute
()
{
// Reorder the per-DOF variables to reflect the new atom order.
if
(
perDofValues
.
getNumParameters
()
==
0
)
return
;
int
numAtoms
=
cu
.
getNumAtoms
();
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
if
(
cu
.
getUseDoublePrecision
())
{
if
(
deviceValuesAreCurrent
)
perDofValues
.
getParameterValues
(
localPerDofValuesDouble
);
vector
<
vector
<
double
>
>
swap
(
3
*
numAtoms
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
swap
[
3
*
lastAtomOrder
[
i
]]
=
localPerDofValuesDouble
[
3
*
i
];
swap
[
3
*
lastAtomOrder
[
i
]
+
1
]
=
localPerDofValuesDouble
[
3
*
i
+
1
];
swap
[
3
*
lastAtomOrder
[
i
]
+
2
]
=
localPerDofValuesDouble
[
3
*
i
+
2
];
}
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
localPerDofValuesDouble
[
3
*
i
]
=
swap
[
3
*
order
[
i
]];
localPerDofValuesDouble
[
3
*
i
+
1
]
=
swap
[
3
*
order
[
i
]
+
1
];
localPerDofValuesDouble
[
3
*
i
+
2
]
=
swap
[
3
*
order
[
i
]
+
2
];
}
perDofValues
.
setParameterValues
(
localPerDofValuesDouble
);
}
else
{
if
(
deviceValuesAreCurrent
)
perDofValues
.
getParameterValues
(
localPerDofValuesFloat
);
vector
<
vector
<
float
>
>
swap
(
3
*
numAtoms
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
swap
[
3
*
lastAtomOrder
[
i
]]
=
localPerDofValuesFloat
[
3
*
i
];
swap
[
3
*
lastAtomOrder
[
i
]
+
1
]
=
localPerDofValuesFloat
[
3
*
i
+
1
];
swap
[
3
*
lastAtomOrder
[
i
]
+
2
]
=
localPerDofValuesFloat
[
3
*
i
+
2
];
}
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
localPerDofValuesFloat
[
3
*
i
]
=
swap
[
3
*
order
[
i
]];
localPerDofValuesFloat
[
3
*
i
+
1
]
=
swap
[
3
*
order
[
i
]
+
1
];
localPerDofValuesFloat
[
3
*
i
+
2
]
=
swap
[
3
*
order
[
i
]
+
2
];
}
perDofValues
.
setParameterValues
(
localPerDofValuesFloat
);
}
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
lastAtomOrder
[
i
]
=
order
[
i
];
deviceValuesAreCurrent
=
true
;
}
private:
CudaContext
&
cu
;
CudaParameterSet
&
perDofValues
;
vector
<
vector
<
float
>
>&
localPerDofValuesFloat
;
vector
<
vector
<
double
>
>&
localPerDofValuesDouble
;
bool
&
deviceValuesAreCurrent
;
vector
<
int
>
lastAtomOrder
;
};
CudaIntegrateCustomStepKernel
::~
CudaIntegrateCustomStepKernel
()
{
cu
.
setAsCurrent
();
if
(
globalValues
!=
NULL
)
delete
globalValues
;
if
(
contextParameterValues
!=
NULL
)
delete
contextParameterValues
;
if
(
sumBuffer
!=
NULL
)
delete
sumBuffer
;
if
(
energy
!=
NULL
)
delete
energy
;
if
(
uniformRandoms
!=
NULL
)
delete
uniformRandoms
;
if
(
randomSeed
!=
NULL
)
delete
randomSeed
;
if
(
perDofValues
!=
NULL
)
delete
perDofValues
;
}
void
CudaIntegrateCustomStepKernel
::
initialize
(
const
System
&
system
,
const
CustomIntegrator
&
integrator
)
{
cu
.
setAsCurrent
();
cu
.
getPlatformData
().
initializeContexts
(
system
);
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
numGlobalVariables
=
integrator
.
getNumGlobalVariables
();
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
globalValues
=
new
CudaArray
(
cu
,
max
(
1
,
numGlobalVariables
),
elementSize
,
"globalVariables"
);
sumBuffer
=
new
CudaArray
(
cu
,
3
*
system
.
getNumParticles
(),
elementSize
,
"sumBuffer"
);
energy
=
new
CudaArray
(
cu
,
1
,
elementSize
,
"energy"
);
perDofValues
=
new
CudaParameterSet
(
cu
,
integrator
.
getNumPerDofVariables
(),
3
*
system
.
getNumParticles
(),
"perDofVariables"
,
false
,
cu
.
getUseDoublePrecision
());
cu
.
addReorderListener
(
new
ReorderListener
(
cu
,
*
perDofValues
,
localPerDofValuesFloat
,
localPerDofValuesDouble
,
deviceValuesAreCurrent
));
prevStepSize
=
-
1.0
;
SimTKOpenMMUtilities
::
setRandomNumberSeed
(
integrator
.
getRandomNumberSeed
());
}
string
CudaIntegrateCustomStepKernel
::
createGlobalComputation
(
const
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
CustomIntegrator
&
integrator
,
const
string
&
energyName
)
{
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
if
(
variable
==
"dt"
)
expressions
[
"dt[0].y = "
]
=
expr
;
else
{
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
if
(
variable
==
integrator
.
getGlobalVariableName
(
i
))
expressions
[
"globals["
+
cu
.
intToString
(
i
)
+
"] = "
]
=
expr
;
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
if
(
variable
==
parameterNames
[
i
])
{
expressions
[
"params["
+
cu
.
intToString
(
i
)
+
"] = "
]
=
expr
;
modifiesParameters
=
true
;
}
}
if
(
expressions
.
size
()
==
0
)
throw
OpenMMException
(
"Unknown global variable: "
+
variable
);
map
<
string
,
string
>
variables
;
variables
[
"dt"
]
=
"dt[0].y"
;
variables
[
"uniform"
]
=
"uniform"
;
variables
[
"gaussian"
]
=
"gaussian"
;
variables
[
energyName
]
=
"energy[0]"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
cu
.
intToString
(
i
)
+
"]"
;
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
variables
[
parameterNames
[
i
]]
=
"params["
+
cu
.
intToString
(
i
)
+
"]"
;
vector
<
pair
<
string
,
string
>
>
functions
;
return
cu
.
getExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
,
""
);
}
string
CudaIntegrateCustomStepKernel
::
createPerDofComputation
(
const
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
int
component
,
CustomIntegrator
&
integrator
,
const
string
&
forceName
,
const
string
&
energyName
)
{
const
string
suffixes
[]
=
{
".x"
,
".y"
,
".z"
};
string
suffix
=
suffixes
[
component
];
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
if
(
variable
==
"x"
)
expressions
[
"position"
+
suffix
+
" = "
]
=
expr
;
else
if
(
variable
==
"v"
)
expressions
[
"velocity"
+
suffix
+
" = "
]
=
expr
;
else
if
(
variable
==
""
)
expressions
[
"sum[3*index+"
+
cu
.
intToString
(
component
)
+
"] = "
]
=
expr
;
else
{
for
(
int
i
=
0
;
i
<
integrator
.
getNumPerDofVariables
();
i
++
)
if
(
variable
==
integrator
.
getPerDofVariableName
(
i
))
expressions
[
"perDof"
+
suffix
.
substr
(
1
)
+
perDofValues
->
getParameterSuffix
(
i
)
+
" = "
]
=
expr
;
}
if
(
expressions
.
size
()
==
0
)
throw
OpenMMException
(
"Unknown per-DOF variable: "
+
variable
);
map
<
string
,
string
>
variables
;
variables
[
"x"
]
=
"position"
+
suffix
;
variables
[
"v"
]
=
"velocity"
+
suffix
;
variables
[
forceName
]
=
"f"
+
suffix
;
variables
[
"gaussian"
]
=
"gaussian"
+
suffix
;
variables
[
"uniform"
]
=
"uniform"
+
suffix
;
variables
[
"m"
]
=
"mass"
;
variables
[
"dt"
]
=
"stepSize"
;
variables
[
energyName
]
=
"energy[0]"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
cu
.
intToString
(
i
)
+
"]"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumPerDofVariables
();
i
++
)
variables
[
integrator
.
getPerDofVariableName
(
i
)]
=
"perDof"
+
suffix
.
substr
(
1
)
+
perDofValues
->
getParameterSuffix
(
i
);
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
variables
[
parameterNames
[
i
]]
=
"params["
+
cu
.
intToString
(
i
)
+
"]"
;
vector
<
pair
<
string
,
string
>
>
functions
;
return
cu
.
getExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
"temp"
+
cu
.
intToString
(
component
)
+
"_"
,
""
,
"double"
);
}
void
CudaIntegrateCustomStepKernel
::
execute
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
)
{
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
int
numAtoms
=
cu
.
getNumAtoms
();
int
numSteps
=
integrator
.
getNumComputations
();
if
(
!
hasInitializedKernels
)
{
hasInitializedKernels
=
true
;
// Initialize various data structures.
const
map
<
string
,
double
>&
params
=
context
.
getParameters
();
if
(
cu
.
getUseDoublePrecision
())
{
contextParameterValues
=
CudaArray
::
create
<
double
>
(
cu
,
max
(
1
,
(
int
)
params
.
size
()),
"contextParameters"
);
contextValuesDouble
.
resize
(
contextParameterValues
->
getSize
());
for
(
map
<
string
,
double
>::
const_iterator
iter
=
params
.
begin
();
iter
!=
params
.
end
();
++
iter
)
{
contextValuesDouble
[
parameterNames
.
size
()]
=
iter
->
second
;
parameterNames
.
push_back
(
iter
->
first
);
}
contextParameterValues
->
upload
(
contextValuesDouble
);
}
else
{
contextParameterValues
=
CudaArray
::
create
<
float
>
(
cu
,
max
(
1
,
(
int
)
params
.
size
()),
"contextParameters"
);
contextValuesFloat
.
resize
(
contextParameterValues
->
getSize
());
for
(
map
<
string
,
double
>::
const_iterator
iter
=
params
.
begin
();
iter
!=
params
.
end
();
++
iter
)
{
contextValuesFloat
[
parameterNames
.
size
()]
=
(
float
)
iter
->
second
;
parameterNames
.
push_back
(
iter
->
first
);
}
contextParameterValues
->
upload
(
contextValuesFloat
);
}
kernels
.
resize
(
integrator
.
getNumComputations
());
kernelArgs
.
resize
(
integrator
.
getNumComputations
());
requiredGaussian
.
resize
(
integrator
.
getNumComputations
(),
0
);
requiredUniform
.
resize
(
integrator
.
getNumComputations
(),
0
);
needsForces
.
resize
(
numSteps
,
false
);
needsEnergy
.
resize
(
numSteps
,
false
);
forceGroup
.
resize
(
numSteps
,
-
2
);
invalidatesForces
.
resize
(
numSteps
,
false
);
merged
.
resize
(
numSteps
,
false
);
modifiesParameters
=
false
;
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
defines
[
"WORK_GROUP_SIZE"
]
=
cu
.
intToString
(
CudaContext
::
ThreadBlockSize
);
defines
[
"SUM_BUFFER_SIZE"
]
=
"0"
;
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
// Initialize the random number generator.
uniformRandoms
=
CudaArray
::
create
<
float4
>
(
cu
,
cu
.
getNumAtoms
(),
"uniformRandoms"
);
randomSeed
=
CudaArray
::
create
<
int4
>
(
cu
,
cu
.
getNumThreadBlocks
()
*
CudaContext
::
ThreadBlockSize
,
"randomSeed"
);
vector
<
int4
>
seed
(
randomSeed
->
getSize
());
unsigned
int
r
=
integrator
.
getRandomNumberSeed
()
+
1
;
for
(
int
i
=
0
;
i
<
randomSeed
->
getSize
();
i
++
)
{
seed
[
i
].
x
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
y
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
z
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
seed
[
i
].
w
=
r
=
(
1664525
*
r
+
1013904223
)
&
0xFFFFFFFF
;
}
randomSeed
->
upload
(
seed
);
CUmodule
randomProgram
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
randomKernel
=
cu
.
getKernel
(
randomProgram
,
"generateRandomNumbers"
);
// Build a list of all variables that affect the forces, so we can tell which
// steps invalidate them.
set
<
string
>
affectsForce
;
affectsForce
.
insert
(
"x"
);
for
(
vector
<
ForceImpl
*>::
const_iterator
iter
=
context
.
getForceImpls
().
begin
();
iter
!=
context
.
getForceImpls
().
end
();
++
iter
)
{
const
map
<
string
,
double
>
params
=
(
*
iter
)
->
getDefaultParameters
();
for
(
map
<
string
,
double
>::
const_iterator
param
=
params
.
begin
();
param
!=
params
.
end
();
++
param
)
affectsForce
.
insert
(
param
->
first
);
}
// Record information about all the computation steps.
stepType
.
resize
(
numSteps
);
vector
<
string
>
variable
(
numSteps
);
vector
<
Lepton
::
ParsedExpression
>
expression
(
numSteps
);
vector
<
string
>
forceGroupName
;
vector
<
string
>
energyGroupName
;
for
(
int
i
=
0
;
i
<
32
;
i
++
)
{
stringstream
fname
;
fname
<<
"f"
<<
i
;
forceGroupName
.
push_back
(
fname
.
str
());
stringstream
ename
;
ename
<<
"energy"
<<
i
;
energyGroupName
.
push_back
(
ename
.
str
());
}
vector
<
string
>
forceName
(
numSteps
,
"f"
);
vector
<
string
>
energyName
(
numSteps
,
"energy"
);
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
string
expr
;
integrator
.
getComputationStep
(
step
,
stepType
[
step
],
variable
[
step
],
expr
);
if
(
expr
.
size
()
>
0
)
{
expression
[
step
]
=
Lepton
::
Parser
::
parse
(
expr
).
optimize
();
if
(
usesVariable
(
expression
[
step
],
"f"
))
{
needsForces
[
step
]
=
true
;
forceGroup
[
step
]
=
-
1
;
}
if
(
usesVariable
(
expression
[
step
],
"energy"
))
{
needsEnergy
[
step
]
=
true
;
forceGroup
[
step
]
=
-
1
;
}
for
(
int
i
=
0
;
i
<
32
;
i
++
)
{
if
(
usesVariable
(
expression
[
step
],
forceGroupName
[
i
]))
{
if
(
forceGroup
[
step
]
!=
-
2
)
throw
OpenMMException
(
"A single computation step cannot depend on multiple force groups"
);
needsForces
[
step
]
=
true
;
forceGroup
[
step
]
=
1
<<
i
;
forceName
[
step
]
=
forceGroupName
[
i
];
}
if
(
usesVariable
(
expression
[
step
],
energyGroupName
[
i
]))
{
if
(
forceGroup
[
step
]
!=
-
2
)
throw
OpenMMException
(
"A single computation step cannot depend on multiple force groups"
);
needsEnergy
[
step
]
=
true
;
forceGroup
[
step
]
=
1
<<
i
;
energyName
[
step
]
=
energyGroupName
[
i
];
}
}
}
invalidatesForces
[
step
]
=
(
stepType
[
step
]
==
CustomIntegrator
::
ConstrainPositions
||
affectsForce
.
find
(
variable
[
step
])
!=
affectsForce
.
end
());
if
(
forceGroup
[
step
]
==
-
2
&&
step
>
0
)
forceGroup
[
step
]
=
forceGroup
[
step
-
1
];
}
// Determine how each step will represent the position (as just a value, or a value plus a delta).
vector
<
bool
>
storePosAsDelta
(
numSteps
,
false
);
vector
<
bool
>
loadPosAsDelta
(
numSteps
,
false
);
bool
beforeConstrain
=
false
;
for
(
int
step
=
numSteps
-
1
;
step
>=
0
;
step
--
)
{
if
(
stepType
[
step
]
==
CustomIntegrator
::
ConstrainPositions
)
beforeConstrain
=
true
;
else
if
(
stepType
[
step
]
==
CustomIntegrator
::
ComputePerDof
&&
variable
[
step
]
==
"x"
&&
beforeConstrain
)
storePosAsDelta
[
step
]
=
true
;
}
bool
storedAsDelta
=
false
;
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
loadPosAsDelta
[
step
]
=
storedAsDelta
;
if
(
storePosAsDelta
[
step
]
==
true
)
storedAsDelta
=
true
;
if
(
stepType
[
step
]
==
CustomIntegrator
::
ConstrainPositions
)
storedAsDelta
=
false
;
}
// Identify steps that can be merged into a single kernel.
for
(
int
step
=
1
;
step
<
numSteps
;
step
++
)
{
if
(
needsForces
[
step
]
||
needsEnergy
[
step
])
continue
;
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputeGlobal
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputeGlobal
)
merged
[
step
]
=
true
;
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputePerDof
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputePerDof
&&
!
usesVariable
(
expression
[
step
],
"uniform"
))
merged
[
step
]
=
true
;
}
// Loop over all steps and create the kernels for them.
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
if
((
stepType
[
step
]
==
CustomIntegrator
::
ComputePerDof
||
stepType
[
step
]
==
CustomIntegrator
::
ComputeSum
)
&&
!
merged
[
step
])
{
// Compute a per-DOF value.
stringstream
compute
;
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
{
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
perDofValues
->
getBuffers
()[
i
];
compute
<<
buffer
.
getType
()
<<
" perDofx"
<<
cu
.
intToString
(
i
+
1
)
<<
" = perDofValues"
<<
cu
.
intToString
(
i
+
1
)
<<
"[3*index];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" perDofy"
<<
cu
.
intToString
(
i
+
1
)
<<
" = perDofValues"
<<
cu
.
intToString
(
i
+
1
)
<<
"[3*index+1];
\n
"
;
compute
<<
buffer
.
getType
()
<<
" perDofz"
<<
cu
.
intToString
(
i
+
1
)
<<
" = perDofValues"
<<
cu
.
intToString
(
i
+
1
)
<<
"[3*index+2];
\n
"
;
}
int
numGaussian
=
0
,
numUniform
=
0
;
for
(
int
j
=
step
;
j
<
numSteps
&&
(
j
==
step
||
merged
[
j
]);
j
++
)
{
compute
<<
"{
\n
"
;
for
(
int
i
=
0
;
i
<
3
;
i
++
)
compute
<<
createPerDofComputation
(
stepType
[
j
]
==
CustomIntegrator
::
ComputePerDof
?
variable
[
j
]
:
""
,
expression
[
j
],
i
,
integrator
,
forceName
[
j
],
energyName
[
j
]);
if
(
variable
[
j
]
==
"x"
)
{
if
(
storePosAsDelta
[
j
])
compute
<<
"posDelta[index] = convertFromDouble4(position-convertToDouble4(posq[index]));
\n
"
;
else
compute
<<
"posq[index] = convertFromDouble4(position);
\n
"
;
}
else
if
(
variable
[
j
]
==
"v"
)
compute
<<
"velm[index] = convertFromDouble4(velocity);
\n
"
;
else
{
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
{
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
perDofValues
->
getBuffers
()[
i
];
compute
<<
"perDofValues"
<<
cu
.
intToString
(
i
+
1
)
<<
"[3*index] = perDofx"
<<
cu
.
intToString
(
i
+
1
)
<<
";
\n
"
;
compute
<<
"perDofValues"
<<
cu
.
intToString
(
i
+
1
)
<<
"[3*index+1] = perDofy"
<<
cu
.
intToString
(
i
+
1
)
<<
";
\n
"
;
compute
<<
"perDofValues"
<<
cu
.
intToString
(
i
+
1
)
<<
"[3*index+2] = perDofz"
<<
cu
.
intToString
(
i
+
1
)
<<
";
\n
"
;
}
}
compute
<<
"}
\n
"
;
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"gaussian"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"uniform"
);
}
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_STEP"
]
=
compute
.
str
();
stringstream
args
;
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
{
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
perDofValues
->
getBuffers
()[
i
];
string
valueName
=
"perDofValues"
+
cu
.
intToString
(
i
+
1
);
args
<<
", "
<<
buffer
.
getType
()
<<
"* __restrict__ "
<<
valueName
;
}
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
if
(
loadPosAsDelta
[
step
])
defines
[
"LOAD_POS_AS_DELTA"
]
=
"1"
;
else
if
(
defines
.
find
(
"LOAD_POS_AS_DELTA"
)
!=
defines
.
end
())
defines
.
erase
(
"LOAD_POS_AS_DELTA"
);
CUmodule
module
=
cu
.
createModule
(
cu
.
replaceStrings
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
customIntegratorPerDof
,
replacements
),
defines
);
CUfunction
kernel
=
cu
.
getKernel
(
module
,
"computePerDof"
);
kernels
[
step
].
push_back
(
kernel
);
requiredGaussian
[
step
]
=
numGaussian
;
requiredUniform
[
step
]
=
numUniform
;
vector
<
void
*>
args1
;
args1
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
args1
.
push_back
(
&
integration
.
getPosDelta
().
getDevicePointer
());
args1
.
push_back
(
&
cu
.
getVelm
().
getDevicePointer
());
args1
.
push_back
(
&
cu
.
getForce
().
getDevicePointer
());
args1
.
push_back
(
&
integration
.
getStepSize
().
getDevicePointer
());
args1
.
push_back
(
&
globalValues
->
getDevicePointer
());
args1
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
args1
.
push_back
(
&
sumBuffer
->
getDevicePointer
());
args1
.
push_back
(
&
integration
.
getRandom
().
getDevicePointer
());
args1
.
push_back
(
NULL
);
args1
.
push_back
(
&
uniformRandoms
->
getDevicePointer
());
args1
.
push_back
(
&
energy
->
getDevicePointer
());
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
args1
.
push_back
(
&
perDofValues
->
getBuffers
()[
i
].
getMemory
());
kernelArgs
[
step
].
push_back
(
args1
);
if
(
stepType
[
step
]
==
CustomIntegrator
::
ComputeSum
)
{
// Create a second kernel for this step that sums the values.
vector
<
void
*>
args2
;
args2
.
push_back
(
&
sumBuffer
->
getDevicePointer
());
bool
found
=
false
;
for
(
int
j
=
0
;
j
<
integrator
.
getNumGlobalVariables
()
&&
!
found
;
j
++
)
if
(
variable
[
step
]
==
integrator
.
getGlobalVariableName
(
j
))
{
args2
.
push_back
(
&
globalValues
->
getDevicePointer
());
defines
[
"SUM_OUTPUT_INDEX"
]
=
cu
.
intToString
(
j
);
found
=
true
;
}
for
(
int
j
=
0
;
j
<
(
int
)
parameterNames
.
size
()
&&
!
found
;
j
++
)
if
(
variable
[
step
]
==
parameterNames
[
j
])
{
args2
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
defines
[
"SUM_OUTPUT_INDEX"
]
=
cu
.
intToString
(
j
);
found
=
true
;
modifiesParameters
=
true
;
}
if
(
!
found
)
throw
OpenMMException
(
"Unknown global variable: "
+
variable
[
step
]);
defines
[
"SUM_BUFFER_SIZE"
]
=
cu
.
intToString
(
3
*
numAtoms
);
module
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
kernel
=
cu
.
getKernel
(
module
,
"computeSum"
);
kernels
[
step
].
push_back
(
kernel
);
kernelArgs
[
step
].
push_back
(
args2
);
}
}
else
if
(
stepType
[
step
]
==
CustomIntegrator
::
ComputeGlobal
&&
!
merged
[
step
])
{
// Compute a global value.
stringstream
compute
;
for
(
int
i
=
step
;
i
<
numSteps
&&
(
i
==
step
||
merged
[
i
]);
i
++
)
compute
<<
"{
\n
"
<<
createGlobalComputation
(
variable
[
i
],
expression
[
i
],
integrator
,
energyName
[
i
])
<<
"}
\n
"
;
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_STEP"
]
=
compute
.
str
();
CUmodule
module
=
cu
.
createModule
(
cu
.
replaceStrings
(
CudaKernelSources
::
customIntegratorGlobal
,
replacements
),
defines
);
CUfunction
kernel
=
cu
.
getKernel
(
module
,
"computeGlobal"
);
kernels
[
step
].
push_back
(
kernel
);
vector
<
void
*>
args
;
args
.
push_back
(
&
integration
.
getStepSize
().
getDevicePointer
());
args
.
push_back
(
&
globalValues
->
getDevicePointer
());
args
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
args
.
push_back
(
NULL
);
args
.
push_back
(
NULL
);
args
.
push_back
(
&
energy
->
getDevicePointer
());
kernelArgs
[
step
].
push_back
(
args
);
}
else
if
(
stepType
[
step
]
==
CustomIntegrator
::
ConstrainPositions
)
{
// Apply position constraints.
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
CUfunction
kernel
=
cu
.
getKernel
(
module
,
"applyPositionDeltas"
);
kernels
[
step
].
push_back
(
kernel
);
vector
<
void
*>
args
;
args
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
args
.
push_back
(
&
integration
.
getPosDelta
().
getDevicePointer
());
kernelArgs
[
step
].
push_back
(
args
);
}
}
// Create the kernel for summing energy.
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
defines
[
"SUM_BUFFER_SIZE"
]
=
cu
.
intToString
(
cu
.
getEnergyBuffer
().
getSize
());
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
sumEnergyKernel
=
cu
.
getKernel
(
module
,
"computeSum"
);
}
// Make sure all values (variables, parameters, etc.) stored on the device are up to date.
if
(
!
deviceValuesAreCurrent
)
{
if
(
cu
.
getUseDoublePrecision
())
perDofValues
->
setParameterValues
(
localPerDofValuesDouble
);
else
perDofValues
->
setParameterValues
(
localPerDofValuesFloat
);
deviceValuesAreCurrent
=
true
;
}
localValuesAreCurrent
=
false
;
double
stepSize
=
integrator
.
getStepSize
();
if
(
stepSize
!=
prevStepSize
)
{
if
(
cu
.
getUseDoublePrecision
())
{
double
size
[]
=
{
0
,
stepSize
};
integration
.
getStepSize
().
upload
(
size
);
}
else
{
float
size
[]
=
{
0
,
(
float
)
stepSize
};
integration
.
getStepSize
().
upload
(
size
);
}
prevStepSize
=
stepSize
;
}
bool
paramsChanged
=
false
;
if
(
cu
.
getUseDoublePrecision
())
{
if
(
cu
.
getUseDoublePrecision
())
{
double2
stepSize
;
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
cu
.
getIntegrationUtilities
().
getStepSize
().
download
(
&
stepSize
);
double
value
=
context
.
getParameter
(
parameterNames
[
i
]);
dt
=
stepSize
.
y
;
if
(
value
!=
contextValuesDouble
[
i
])
{
time
=
cu
.
getTime
()
+
dt
;
contextValuesDouble
[
i
]
=
value
;
if
(
dt
==
maxStepSize
)
paramsChanged
=
true
;
time
=
maxTime
;
// Avoid round-off error
}
}
if
(
paramsChanged
)
contextParameterValues
->
upload
(
contextValuesDouble
);
}
}
else
{
else
{
float2
stepSize
;
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
cu
.
getIntegrationUtilities
().
getStepSize
().
download
(
&
stepSize
);
float
value
=
(
float
)
context
.
getParameter
(
parameterNames
[
i
]);
dt
=
stepSize
.
y
;
if
(
value
!=
contextValuesFloat
[
i
])
{
time
=
cu
.
getTime
()
+
dt
;
contextValuesFloat
[
i
]
=
value
;
if
(
dt
==
maxStepSizeFloat
)
paramsChanged
=
true
;
time
=
maxTime
;
// Avoid round-off error
}
}
if
(
paramsChanged
)
contextParameterValues
->
upload
(
contextValuesFloat
);
}
// Loop over computation steps in the integrator and execute them.
void
*
randomArgs
[]
=
{
&
uniformRandoms
->
getDevicePointer
(),
&
randomSeed
->
getDevicePointer
()};
for
(
int
i
=
0
;
i
<
numSteps
;
i
++
)
{
if
((
needsForces
[
i
]
||
needsEnergy
[
i
])
&&
(
!
forcesAreValid
||
context
.
getLastForceGroups
()
!=
forceGroup
[
i
]))
{
// Recompute forces and/or energy. Figure out what is actually needed
// between now and the next time they get invalidated again.
bool
computeForce
=
false
,
computeEnergy
=
false
;
for
(
int
j
=
i
;
;
j
++
)
{
if
(
needsForces
[
j
])
computeForce
=
true
;
if
(
needsEnergy
[
j
])
computeEnergy
=
true
;
if
(
invalidatesForces
[
j
])
break
;
if
(
j
==
numSteps
-
1
)
j
=
-
1
;
if
(
j
==
i
-
1
)
break
;
}
recordChangedParameters
(
context
);
context
.
calcForcesAndEnergy
(
computeForce
,
computeEnergy
,
forceGroup
[
i
]);
if
(
computeEnergy
)
{
void
*
args
[]
=
{
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
energy
->
getDevicePointer
()};
cu
.
executeKernel
(
sumEnergyKernel
,
&
args
[
0
],
CudaContext
::
ThreadBlockSize
,
CudaContext
::
ThreadBlockSize
);
}
forcesAreValid
=
true
;
}
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputePerDof
&&
!
merged
[
i
])
{
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
kernelArgs
[
i
][
0
][
9
]
=
&
randomIndex
;
if
(
requiredUniform
[
i
]
>
0
)
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeGlobal
&&
!
merged
[
i
])
{
float
uniform
=
SimTKOpenMMUtilities
::
getUniformlyDistributedRandomNumber
();
float
gauss
=
SimTKOpenMMUtilities
::
getNormallyDistributedRandomNumber
();
kernelArgs
[
i
][
0
][
3
]
=
&
uniform
;
kernelArgs
[
i
][
0
][
4
]
=
&
gauss
;
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
1
,
1
);
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeSum
)
{
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
kernelArgs
[
i
][
0
][
9
]
=
&
randomIndex
;
if
(
requiredUniform
[
i
]
>
0
)
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
1
],
&
kernelArgs
[
i
][
1
][
0
],
CudaContext
::
ThreadBlockSize
,
CudaContext
::
ThreadBlockSize
);
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
UpdateContextState
)
{
recordChangedParameters
(
context
);
context
.
updateContextState
();
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ConstrainPositions
)
{
cu
.
getIntegrationUtilities
().
applyConstraints
(
integrator
.
getConstraintTolerance
());
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
cu
.
getIntegrationUtilities
().
computeVirtualSites
();
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ConstrainVelocities
)
{
cu
.
getIntegrationUtilities
().
applyVelocityConstraints
(
integrator
.
getConstraintTolerance
());
}
if
(
invalidatesForces
[
i
])
forcesAreValid
=
false
;
}
}
cu
.
setTime
(
time
);
recordChangedParameters
(
context
);
// Update the time and step count.
cu
.
setTime
(
cu
.
getTime
()
+
stepSize
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
return
dt
;
}
}
CudaIntegrateVariableLangevinStepKernel
::~
CudaIntegrateVariableLangevinStepKernel
()
{
void
CudaIntegrateCustomStepKernel
::
recordChangedParameters
(
ContextImpl
&
context
)
{
cu
.
setAsCurrent
();
if
(
!
modifiesParameters
)
if
(
params
!=
NULL
)
return
;
delete
params
;
if
(
cu
.
getUseDoublePrecision
())
{
contextParameterValues
->
download
(
contextValuesDouble
);
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
double
value
=
context
.
getParameter
(
parameterNames
[
i
]);
if
(
value
!=
contextValuesDouble
[
i
])
context
.
setParameter
(
parameterNames
[
i
],
contextValuesDouble
[
i
]);
}
}
else
{
contextParameterValues
->
download
(
contextValuesFloat
);
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
float
value
=
(
float
)
context
.
getParameter
(
parameterNames
[
i
]);
if
(
value
!=
contextValuesFloat
[
i
])
context
.
setParameter
(
parameterNames
[
i
],
contextValuesFloat
[
i
]);
}
}
}
}
void
CudaIntegrateVariableLangevinStepKernel
::
initialize
(
const
System
&
system
,
const
VariableLangevinIntegrator
&
integrator
)
{
void
CudaIntegrateCustomStepKernel
::
getGlobalVariables
(
ContextImpl
&
context
,
vector
<
double
>&
values
)
const
{
cu
.
setAsCurrent
();
values
.
resize
(
numGlobalVariables
);
cu
.
getPlatformData
().
initializeContexts
(
system
);
if
(
numGlobalVariables
==
0
)
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
return
;
map
<
string
,
string
>
defines
;
if
(
cu
.
getUseDoublePrecision
())
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
globalValues
->
download
(
values
);
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
else
{
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
langevin
,
defines
,
""
);
vector
<
float
>
buffer
;
kernel1
=
cu
.
getKernel
(
module
,
"integrateLangevinPart1"
);
globalValues
->
download
(
buffer
);
kernel2
=
cu
.
getKernel
(
module
,
"integrateLangevinPart2"
);
for
(
int
i
=
0
;
i
<
numGlobalVariables
;
i
++
)
selectSizeKernel
=
cu
.
getKernel
(
module
,
"selectLangevinStepSize"
);
values
[
i
]
=
buffer
[
i
];
params
=
CudaArray
::
create
<
float
>
(
cu
,
3
,
"langevinParams"
);
}
blockSize
=
min
(
256
,
system
.
getNumParticles
());
blockSize
=
max
(
blockSize
,
params
->
getSize
());
}
}
double
CudaIntegrateVariableLangevinStepKernel
::
execute
(
ContextImpl
&
context
,
const
VariableLangevinIntegrator
&
integrator
,
double
maxTime
)
{
void
CudaIntegrateCustomStepKernel
::
setGlobalVariables
(
ContextImpl
&
context
,
const
vector
<
double
>&
values
)
{
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
if
(
numGlobalVariables
==
0
)
int
numAtoms
=
cu
.
getNumAtoms
();
return
;
if
(
cu
.
getUseDoublePrecision
())
// Select the step size to use.
globalValues
->
upload
(
values
);
else
{
double
maxStepSize
=
maxTime
-
cu
.
getTime
();
vector
<
float
>
buffer
(
numGlobalVariables
);
float
maxStepSizeFloat
=
(
float
)
maxStepSize
;
for
(
int
i
=
0
;
i
<
numGlobalVariables
;
i
++
)
double
tol
=
integrator
.
getErrorTolerance
();
buffer
[
i
]
=
(
float
)
values
[
i
];
float
tolFloat
=
(
float
)
tol
;
globalValues
->
upload
(
buffer
);
double
tau
=
integrator
.
getFriction
()
==
0.0
?
0.0
:
1.0
/
integrator
.
getFriction
();
}
float
tauFloat
=
(
float
)
tau
;
}
double
kT
=
BOLTZ
*
integrator
.
getTemperature
();
float
kTFloat
=
(
float
)
kT
;
void
*
argsSelect
[]
=
{
cu
.
getUseDoublePrecision
()
?
(
void
*
)
&
maxStepSize
:
(
void
*
)
&
maxStepSizeFloat
,
cu
.
getUseDoublePrecision
()
?
(
void
*
)
&
tol
:
(
void
*
)
&
tolFloat
,
cu
.
getUseDoublePrecision
()
?
(
void
*
)
&
tau
:
(
void
*
)
&
tauFloat
,
cu
.
getUseDoublePrecision
()
?
(
void
*
)
&
kT
:
(
void
*
)
&
kTFloat
,
&
cu
.
getIntegrationUtilities
().
getStepSize
().
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
params
->
getDevicePointer
()};
int
sharedSize
=
blockSize
*
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
cu
.
executeKernel
(
selectSizeKernel
,
argsSelect
,
blockSize
,
blockSize
,
sharedSize
);
// Call the first integration kernel.
int
randomIndex
=
integration
.
prepareRandomNumbers
(
cu
.
getPaddedNumAtoms
());
void
*
args1
[]
=
{
&
cu
.
getVelm
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
params
->
getDevicePointer
(),
&
integration
.
getStepSize
().
getDevicePointer
(),
&
integration
.
getRandom
().
getDevicePointer
(),
&
randomIndex
};
cu
.
executeKernel
(
kernel1
,
args1
,
numAtoms
);
// Apply constraints.
integration
.
applyConstraints
(
integrator
.
getConstraintTolerance
());
// Call the second integration kernel.
void
*
args2
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
integration
.
getPosDelta
().
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
integration
.
getStepSize
().
getDevicePointer
()};
cu
.
executeKernel
(
kernel2
,
args2
,
numAtoms
);
integration
.
computeVirtualSites
();
// Update the time and step count.
double
dt
,
time
;
void
CudaIntegrateCustomStepKernel
::
getPerDofVariable
(
ContextImpl
&
context
,
int
variable
,
vector
<
Vec3
>&
values
)
const
{
values
.
resize
(
perDofValues
->
getNumObjects
()
/
3
);
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
if
(
cu
.
getUseDoublePrecision
())
{
if
(
cu
.
getUseDoublePrecision
())
{
double2
stepSize
;
if
(
!
localValuesAreCurrent
)
{
cu
.
getIntegrationUtilities
().
getStepSize
().
download
(
&
stepSize
);
perDofValues
->
getParameterValues
(
localPerDofValuesDouble
);
dt
=
stepSize
.
y
;
localValuesAreCurrent
=
true
;
time
=
cu
.
getTime
()
+
dt
;
}
if
(
dt
==
maxStepSize
)
for
(
int
i
=
0
;
i
<
(
int
)
values
.
size
();
i
++
)
time
=
maxTime
;
// Avoid round-off error
for
(
int
j
=
0
;
j
<
3
;
j
++
)
values
[
order
[
i
]][
j
]
=
localPerDofValuesDouble
[
3
*
i
+
j
][
variable
];
}
}
else
{
else
{
float2
stepSize
;
if
(
!
localValuesAreCurrent
)
{
cu
.
getIntegrationUtilities
().
getStepSize
().
download
(
&
stepSize
);
perDofValues
->
getParameterValues
(
localPerDofValuesFloat
);
dt
=
stepSize
.
y
;
localValuesAreCurrent
=
true
;
time
=
cu
.
getTime
()
+
dt
;
}
if
(
dt
==
maxStepSizeFloat
)
for
(
int
i
=
0
;
i
<
(
int
)
values
.
size
();
i
++
)
time
=
maxTime
;
// Avoid round-off error
for
(
int
j
=
0
;
j
<
3
;
j
++
)
values
[
order
[
i
]][
j
]
=
localPerDofValuesFloat
[
3
*
i
+
j
][
variable
];
}
}
cu
.
setTime
(
time
);
cu
.
setStepCount
(
cu
.
getStepCount
()
+
1
);
return
dt
;
}
}
//class CudaIntegrateCustomStepKernel::ReorderListener : public CudaContext::ReorderListener {
void
CudaIntegrateCustomStepKernel
::
setPerDofVariable
(
ContextImpl
&
context
,
int
variable
,
const
vector
<
Vec3
>&
values
)
{
//public:
const
vector
<
int
>&
order
=
cu
.
getAtomIndex
();
// ReorderListener(CudaContext& cu, CudaParameterSet& perDofValues, vector<vector<cl_float> >& localPerDofValues, bool& deviceValuesAreCurrent) :
if
(
cu
.
getUseDoublePrecision
())
{
// cu(cu), perDofValues(perDofValues), localPerDofValues(localPerDofValues), deviceValuesAreCurrent(deviceValuesAreCurrent) {
if
(
!
localValuesAreCurrent
)
{
// int numAtoms = cu.getNumAtoms();
perDofValues
->
getParameterValues
(
localPerDofValuesDouble
);
// lastAtomOrder.resize(numAtoms);
localValuesAreCurrent
=
true
;
// for (int i = 0; i < numAtoms; i++)
}
// lastAtomOrder[i] = cu.getAtomIndex()[i];
for
(
int
i
=
0
;
i
<
(
int
)
values
.
size
();
i
++
)
// }
for
(
int
j
=
0
;
j
<
3
;
j
++
)
// void execute() {
localPerDofValuesDouble
[
3
*
i
+
j
][
variable
]
=
values
[
order
[
i
]][
j
];
// // Reorder the per-DOF variables to reflect the new atom order.
}
//
else
{
// if (perDofValues.getNumParameters() == 0)
if
(
!
localValuesAreCurrent
)
{
// return;
perDofValues
->
getParameterValues
(
localPerDofValuesFloat
);
// int numAtoms = cu.getNumAtoms();
localValuesAreCurrent
=
true
;
// if (deviceValuesAreCurrent)
}
// perDofValues.getParameterValues(localPerDofValues);
for
(
int
i
=
0
;
i
<
(
int
)
values
.
size
();
i
++
)
// vector<vector<cl_float> > swap(3*numAtoms);
for
(
int
j
=
0
;
j
<
3
;
j
++
)
// for (int i = 0; i < numAtoms; i++) {
localPerDofValuesFloat
[
3
*
i
+
j
][
variable
]
=
(
float
)
values
[
order
[
i
]][
j
];
// swap[3*lastAtomOrder[i]] = localPerDofValues[3*i];
}
// swap[3*lastAtomOrder[i]+1] = localPerDofValues[3*i+1];
deviceValuesAreCurrent
=
false
;
// swap[3*lastAtomOrder[i]+2] = localPerDofValues[3*i+2];
}
// }
// CudaArray<cl_int>& order = cu.getAtomIndex();
// for (int i = 0; i < numAtoms; i++) {
// localPerDofValues[3*i] = swap[3*order[i]];
// localPerDofValues[3*i+1] = swap[3*order[i]+1];
// localPerDofValues[3*i+2] = swap[3*order[i]+2];
// }
// perDofValues.setParameterValues(localPerDofValues);
// for (int i = 0; i < numAtoms; i++)
// lastAtomOrder[i] = order[i];
// deviceValuesAreCurrent = true;
// }
//private:
// CudaContext& cu;
// CudaParameterSet& perDofValues;
// vector<vector<cl_float> >& localPerDofValues;
// bool& deviceValuesAreCurrent;
// vector<int> lastAtomOrder;
//};
//
//CudaIntegrateCustomStepKernel::~CudaIntegrateCustomStepKernel() {
// cu.setAsCurrent();
// if (globalValues != NULL)
// delete globalValues;
// if (contextParameterValues != NULL)
// delete contextParameterValues;
// if (sumBuffer != NULL)
// delete sumBuffer;
// if (energy != NULL)
// delete energy;
// if (uniformRandoms != NULL)
// delete uniformRandoms;
// if (randomSeed != NULL)
// delete randomSeed;
// if (perDofValues != NULL)
// delete perDofValues;
//}
//
//void CudaIntegrateCustomStepKernel::initialize(const System& system, const CustomIntegrator& integrator) {
// cu.setAsCurrent();
// cu.getPlatformData().initializeContexts(system);
// cu.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
// numGlobalVariables = integrator.getNumGlobalVariables();
// globalValues = new CudaArray<cl_float>(cu, max(1, numGlobalVariables), "globalVariables", true);
// sumBuffer = new CudaArray<cl_float>(cu, 3*system.getNumParticles(), "sumBuffer");
// energy = new CudaArray<cl_float>(cu, 1, "energy");
// perDofValues = new CudaParameterSet(cu, integrator.getNumPerDofVariables(), 3*system.getNumParticles(), "perDofVariables");
// cu.addReorderListener(new ReorderListener(cu, *perDofValues, localPerDofValues, deviceValuesAreCurrent));
// prevStepSize = -1.0;
// SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed());
//}
//
//string CudaIntegrateCustomStepKernel::createGlobalComputation(const string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator, const string& energyName) {
// map<string, Lepton::ParsedExpression> expressions;
// if (variable == "dt")
// expressions["dt[0].y = "] = expr;
// else {
// for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
// if (variable == integrator.getGlobalVariableName(i))
// expressions["globals["+cu.intToString(i)+"] = "] = expr;
// for (int i = 0; i < (int) parameterNames.size(); i++)
// if (variable == parameterNames[i]) {
// expressions["params["+cu.intToString(i)+"] = "] = expr;
// modifiesParameters = true;
// }
// }
// if (expressions.size() == 0)
// throw OpenMMException("Unknown global variable: "+variable);
// map<string, string> variables;
// variables["dt"] = "dt[0].y";
// variables["uniform"] = "uniform";
// variables["gaussian"] = "gaussian";
// variables[energyName] = "energy[0]";
// for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
// variables[integrator.getGlobalVariableName(i)] = "globals["+cu.intToString(i)+"]";
// for (int i = 0; i < (int) parameterNames.size(); i++)
// variables[parameterNames[i]] = "params["+cu.intToString(i)+"]";
// vector<pair<string, string> > functions;
// return cu.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp", "");
//}
//
//string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const string& forceName, const string& energyName) {
// const string suffixes[] = {".x", ".y", ".z"};
// string suffix = suffixes[component];
// map<string, Lepton::ParsedExpression> expressions;
// if (variable == "x")
// expressions["position"+suffix+" = "] = expr;
// else if (variable == "v")
// expressions["velocity"+suffix+" = "] = expr;
// else if (variable == "")
// expressions["sum[3*index+"+cu.intToString(component)+"] = "] = expr;
// else {
// for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
// if (variable == integrator.getPerDofVariableName(i))
// expressions["perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i)+" = "] = expr;
// }
// if (expressions.size() == 0)
// throw OpenMMException("Unknown per-DOF variable: "+variable);
// map<string, string> variables;
// variables["x"] = "position"+suffix;
// variables["v"] = "velocity"+suffix;
// variables[forceName] = "f"+suffix;
// variables["gaussian"] = "gaussian"+suffix;
// variables["uniform"] = "uniform"+suffix;
// variables["m"] = "mass";
// variables["dt"] = "stepSize";
// variables[energyName] = "energy[0]";
// for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
// variables[integrator.getGlobalVariableName(i)] = "globals["+cu.intToString(i)+"]";
// for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
// variables[integrator.getPerDofVariableName(i)] = "perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i);
// for (int i = 0; i < (int) parameterNames.size(); i++)
// variables[parameterNames[i]] = "params["+cu.intToString(i)+"]";
// vector<pair<string, string> > functions;
// string tempType = (cu.getSupportsDoublePrecision() ? "double" : "float");
// return cu.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp"+cu.intToString(component)+"_", "", tempType);
//}
//
//void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid) {
// CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
// int numAtoms = cu.getNumAtoms();
// int numSteps = integrator.getNumComputations();
// if (!hasInitializedKernels) {
// hasInitializedKernels = true;
//
// // Initialize various data structures.
//
// const map<string, double>& params = context.getParameters();
// contextParameterValues = new CudaArray<cl_float>(cu, max(1, (int) params.size()), "contextParameters", true);
// for (map<string, double>::const_iterator iter = params.begin(); iter != params.end(); ++iter) {
// contextParameterValues->set(parameterNames.size(), (float) iter->second);
// parameterNames.push_back(iter->first);
// }
// contextParameterValues->upload();
// kernels.resize(integrator.getNumComputations());
// requiredGaussian.resize(integrator.getNumComputations(), 0);
// requiredUniform.resize(integrator.getNumComputations(), 0);
// needsForces.resize(numSteps, false);
// needsEnergy.resize(numSteps, false);
// forceGroup.resize(numSteps, -2);
// invalidatesForces.resize(numSteps, false);
// merged.resize(numSteps, false);
// modifiesParameters = false;
// map<string, string> defines;
// defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
// defines["WORK_GROUP_SIZE"] = cu.intToString(CudaContext::ThreadBlockSize);
//
// // Initialize the random number generator.
//
// uniformRandoms = new CudaArray<mm_float4>(cu, cu.getNumAtoms(), "uniformRandoms");
// randomSeed = new CudaArray<mm_int4>(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, "randomSeed");
// vector<mm_int4> seed(randomSeed->getSize());
// unsigned int r = integrator.getRandomNumberSeed()+1;
// for (int i = 0; i < randomSeed->getSize(); i++) {
// seed[i].x = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
// seed[i].y = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
// seed[i].z = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
// seed[i].w = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
// }
// randomSeed->upload(seed);
// CUmodule randomProgram = cu.createModule(CudaKernelSources::customIntegrator, defines);
// randomKernel = cu.getKernel(randomProgram, "generateRandomNumbers");
// randomKernel.setArg<cu::Buffer>(0, uniformRandoms->getDevicePointer());
// randomKernel.setArg<cu::Buffer>(1, randomSeed->getDevicePointer());
//
// // Build a list of all variables that affect the forces, so we can tell which
// // steps invalidate them.
//
// set<string> affectsForce;
// affectsForce.insert("x");
// for (vector<ForceImpl*>::const_iterator iter = context.getForceImpls().begin(); iter != context.getForceImpls().end(); ++iter) {
// const map<string, double> params = (*iter)->getDefaultParameters();
// for (map<string, double>::const_iterator param = params.begin(); param != params.end(); ++param)
// affectsForce.insert(param->first);
// }
//
// // Record information about all the computation steps.
//
// stepType.resize(numSteps);
// vector<string> variable(numSteps);
// vector<Lepton::ParsedExpression> expression(numSteps);
// vector<string> forceGroupName;
// vector<string> energyGroupName;
// for (int i = 0; i < 32; i++) {
// stringstream fname;
// fname << "f" << i;
// forceGroupName.push_back(fname.str());
// stringstream ename;
// ename << "energy" << i;
// energyGroupName.push_back(ename.str());
// }
// vector<string> forceName(numSteps, "f");
// vector<string> energyName(numSteps, "energy");
// for (int step = 0; step < numSteps; step++) {
// string expr;
// integrator.getComputationStep(step, stepType[step], variable[step], expr);
// if (expr.size() > 0) {
// expression[step] = Lepton::Parser::parse(expr).optimize();
// if (usesVariable(expression[step], "f")) {
// needsForces[step] = true;
// forceGroup[step] = -1;
// }
// if (usesVariable(expression[step], "energy")) {
// needsEnergy[step] = true;
// forceGroup[step] = -1;
// }
// for (int i = 0; i < 32; i++) {
// if (usesVariable(expression[step], forceGroupName[i])) {
// if (forceGroup[step] != -2)
// throw OpenMMException("A single computation step cannot depend on multiple force groups");
// needsForces[step] = true;
// forceGroup[step] = 1<<i;
// forceName[step] = forceGroupName[i];
// }
// if (usesVariable(expression[step], energyGroupName[i])) {
// if (forceGroup[step] != -2)
// throw OpenMMException("A single computation step cannot depend on multiple force groups");
// needsEnergy[step] = true;
// forceGroup[step] = 1<<i;
// energyName[step] = energyGroupName[i];
// }
// }
// }
// invalidatesForces[step] = (stepType[step] == CustomIntegrator::ConstrainPositions || affectsForce.find(variable[step]) != affectsForce.end());
// if (forceGroup[step] == -2 && step > 0)
// forceGroup[step] = forceGroup[step-1];
// }
//
// // Determine how each step will represent the position (as just a value, or a value plus a delta).
//
// vector<bool> storePosAsDelta(numSteps, false);
// vector<bool> loadPosAsDelta(numSteps, false);
// bool beforeConstrain = false;
// for (int step = numSteps-1; step >= 0; step--) {
// if (stepType[step] == CustomIntegrator::ConstrainPositions)
// beforeConstrain = true;
// else if (stepType[step] == CustomIntegrator::ComputePerDof && variable[step] == "x" && beforeConstrain)
// storePosAsDelta[step] = true;
// }
// bool storedAsDelta = false;
// for (int step = 0; step < numSteps; step++) {
// loadPosAsDelta[step] = storedAsDelta;
// if (storePosAsDelta[step] == true)
// storedAsDelta = true;
// if (stepType[step] == CustomIntegrator::ConstrainPositions)
// storedAsDelta = false;
// }
//
// // Identify steps that can be merged into a single kernel.
//
// for (int step = 1; step < numSteps; step++) {
// if (needsForces[step] || needsEnergy[step])
// continue;
// if (stepType[step-1] == CustomIntegrator::ComputeGlobal && stepType[step] == CustomIntegrator::ComputeGlobal)
// merged[step] = true;
// if (stepType[step-1] == CustomIntegrator::ComputePerDof && stepType[step] == CustomIntegrator::ComputePerDof &&
// !usesVariable(expression[step], "uniform"))
// merged[step] = true;
// }
//
// // Loop over all steps and create the kernels for them.
//
// for (int step = 0; step < numSteps; step++) {
// if ((stepType[step] == CustomIntegrator::ComputePerDof || stepType[step] == CustomIntegrator::ComputeSum) && !merged[step]) {
// // Compute a per-DOF value.
//
// stringstream compute;
// for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i];
// compute << buffer.getType()<<" perDofx"<<cu.intToString(i+1)<<" = perDofValues"<<cu.intToString(i+1)<<"[3*index];\n";
// compute << buffer.getType()<<" perDofy"<<cu.intToString(i+1)<<" = perDofValues"<<cu.intToString(i+1)<<"[3*index+1];\n";
// compute << buffer.getType()<<" perDofz"<<cu.intToString(i+1)<<" = perDofValues"<<cu.intToString(i+1)<<"[3*index+2];\n";
// }
// string convert = (cu.getSupportsDoublePrecision() ? "convert_float4(" : "(");
// int numGaussian = 0, numUniform = 0;
// for (int j = step; j < numSteps && (j == step || merged[j]); j++) {
// compute << "{\n";
// for (int i = 0; i < 3; i++)
// compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j], i, integrator, forceName[j], energyName[j]);
// if (variable[j] == "x") {
// if (storePosAsDelta[j]) {
// if (cu.getSupportsDoublePrecision())
// compute << "posDelta[index] = convert_float4(position-convert_double4(posq[index]));\n";
// else
// compute << "posDelta[index] = position-posq[index];\n";
// }
// else
// compute << "posq[index] = " << convert << "position);\n";
// }
// else if (variable[j] == "v")
// compute << "velm[index] = " << convert << "velocity);\n";
// else {
// for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i];
// compute << "perDofValues"<<cu.intToString(i+1)<<"[3*index] = perDofx"<<cu.intToString(i+1)<<";\n";
// compute << "perDofValues"<<cu.intToString(i+1)<<"[3*index+1] = perDofy"<<cu.intToString(i+1)<<";\n";
// compute << "perDofValues"<<cu.intToString(i+1)<<"[3*index+2] = perDofz"<<cu.intToString(i+1)<<";\n";
// }
// }
// compute << "}\n";
// numGaussian += numAtoms*usesVariable(expression[j], "gaussian");
// numUniform += numAtoms*usesVariable(expression[j], "uniform");
// }
// map<string, string> replacements;
// replacements["COMPUTE_STEP"] = compute.str();
// stringstream args;
// for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i];
// string valueName = "perDofValues"+cu.intToString(i+1);
// args << ", __global " << buffer.getType() << "* restrict " << valueName;
// }
// replacements["PARAMETER_ARGUMENTS"] = args.str();
// if (loadPosAsDelta[step])
// defines["LOAD_POS_AS_DELTA"] = "1";
// else if (defines.find("LOAD_POS_AS_DELTA") != defines.end())
// defines.erase("LOAD_POS_AS_DELTA");
// CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customIntegratorPerDof, replacements), defines);
// cu::Kernel kernel = cu.getKernel(module, "computePerDof");
// kernels[step].push_back(kernel);
// requiredGaussian[step] = numGaussian;
// requiredUniform[step] = numUniform;
// int index = 0;
// kernel.setArg<cu::Buffer>(index++, cu.getPosq().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, integration.getPosDelta().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, cu.getVelm().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, cu.getForce().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, integration.getStepSize().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, globalValues->getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, contextParameterValues->getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, sumBuffer->getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, integration.getRandom().getDevicePointer());
// index++;
// kernel.setArg<cu::Buffer>(index++, uniformRandoms->getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, energy->getDevicePointer());
// for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
// kernel.setArg<cu::Memory>(index++, perDofValues->getBuffers()[i].getMemory());
// if (stepType[step] == CustomIntegrator::ComputeSum) {
// // Create a second kernel for this step that sums the values.
//
// module = cu.createModule(CudaKernelSources::customIntegrator, defines);
// kernel = cu.getKernel(module, "computeSum");
// kernels[step].push_back(kernel);
// index = 0;
// kernel.setArg<cu::Buffer>(index++, sumBuffer->getDevicePointer());
// bool found = false;
// for (int j = 0; j < integrator.getNumGlobalVariables() && !found; j++)
// if (variable[step] == integrator.getGlobalVariableName(j)) {
// kernel.setArg<cu::Buffer>(index++, globalValues->getDevicePointer());
// kernel.setArg<cl_uint>(index++, j);
// found = true;
// }
// for (int j = 0; j < (int) parameterNames.size() && !found; j++)
// if (variable[step] == parameterNames[j]) {
// kernel.setArg<cu::Buffer>(index++, contextParameterValues->getDevicePointer());
// kernel.setArg<cl_uint>(index++, j);
// found = true;
// modifiesParameters = true;
// }
// if (!found)
// throw OpenMMException("Unknown global variable: "+variable[step]);
// kernel.setArg<cl_int>(index++, 3*numAtoms);
// }
// }
// else if (stepType[step] == CustomIntegrator::ComputeGlobal && !merged[step]) {
// // Compute a global value.
//
// stringstream compute;
// for (int i = step; i < numSteps && (i == step || merged[i]); i++)
// compute << "{\n" << createGlobalComputation(variable[i], expression[i], integrator, energyName[i]) << "}\n";
// map<string, string> replacements;
// replacements["COMPUTE_STEP"] = compute.str();
// CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customIntegratorGlobal, replacements), defines);
// cu::Kernel kernel = cu.getKernel(module, "computeGlobal");
// kernels[step].push_back(kernel);
// int index = 0;
// kernel.setArg<cu::Buffer>(index++, integration.getStepSize().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, globalValues->getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, contextParameterValues->getDevicePointer());
// index += 2;
// kernel.setArg<cu::Buffer>(index++, energy->getDevicePointer());
// }
// else if (stepType[step] == CustomIntegrator::ConstrainPositions) {
// // Apply position constraints.
//
// CUmodule module = cu.createModule(CudaKernelSources::customIntegrator, defines);
// cu::Kernel kernel = cu.getKernel(module, "applyPositionDeltas");
// kernels[step].push_back(kernel);
// int index = 0;
// kernel.setArg<cu::Buffer>(index++, cu.getPosq().getDevicePointer());
// kernel.setArg<cu::Buffer>(index++, integration.getPosDelta().getDevicePointer());
// }
// }
//
// // Create the kernel for summing energy.
//
// CUmodule module = cu.createModule(CudaKernelSources::customIntegrator, defines);
// sumEnergyKernel = cu.getKernel(module, "computeSum");
// int index = 0;
// sumEnergyKernel.setArg<cu::Buffer>(index++, cu.getEnergyBuffer().getDevicePointer());
// sumEnergyKernel.setArg<cu::Buffer>(index++, energy->getDevicePointer());
// sumEnergyKernel.setArg<cl_int>(index++, 0);
// sumEnergyKernel.setArg<cl_int>(index++, cu.getEnergyBuffer().getSize());
// }
//
// // Make sure all values (variables, parameters, etc.) stored on the device are up to date.
//
// if (!deviceValuesAreCurrent) {
// perDofValues->setParameterValues(localPerDofValues);
// deviceValuesAreCurrent = true;
// }
// localValuesAreCurrent = false;
// double stepSize = integrator.getStepSize();
// if (stepSize != prevStepSize) {
// integration.getStepSize()[0].y = (cl_float) stepSize;
// integration.getStepSize().upload();
// prevStepSize = stepSize;
// }
// bool paramsChanged = false;
// for (int i = 0; i < (int) parameterNames.size(); i++) {
// float value = (float) context.getParameter(parameterNames[i]);
// if (value != contextParameterValues->get(i)) {
// contextParameterValues->set(i, value);
// paramsChanged = true;
// }
// }
// if (paramsChanged)
// contextParameterValues->upload();
//
// // Loop over computation steps in the integrator and execute them.
//
// for (int i = 0; i < numSteps; i++) {
// if ((needsForces[i] || needsEnergy[i]) && (!forcesAreValid || context.getLastForceGroups() != forceGroup[i])) {
// // Recompute forces and/or energy. Figure out what is actually needed
// // between now and the next time they get invalidated again.
//
// bool computeForce = false, computeEnergy = false;
// for (int j = i; ; j++) {
// if (needsForces[j])
// computeForce = true;
// if (needsEnergy[j])
// computeEnergy = true;
// if (invalidatesForces[j])
// break;
// if (j == numSteps-1)
// j = -1;
// if (j == i-1)
// break;
// }
// recordChangedParameters(context);
// context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroup[i]);
// if (computeEnergy)
// cu.executeKernel(sumEnergyKernel, CudaContext::ThreadBlockSize, CudaContext::ThreadBlockSize);
// forcesAreValid = true;
// }
// if (stepType[i] == CustomIntegrator::ComputePerDof && !merged[i]) {
// kernels[i][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[i]));
// if (requiredUniform[i] > 0)
// cu.executeKernel(randomKernel, numAtoms);
// cu.executeKernel(kernels[i][0], numAtoms);
// }
// else if (stepType[i] == CustomIntegrator::ComputeGlobal && !merged[i]) {
// kernels[i][0].setArg<cl_float>(3, SimTKOpenMMUtilities::getUniformlyDistributedRandomNumber());
// kernels[i][0].setArg<cl_float>(4, SimTKOpenMMUtilities::getNormallyDistributedRandomNumber());
// cu.executeKernel(kernels[i][0], 1, 1);
// }
// else if (stepType[i] == CustomIntegrator::ComputeSum) {
// kernels[i][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[i]));
// if (requiredUniform[i] > 0)
// cu.executeKernel(randomKernel, numAtoms);
// cu.executeKernel(kernels[i][0], numAtoms);
// cu.executeKernel(kernels[i][1], CudaContext::ThreadBlockSize, CudaContext::ThreadBlockSize);
// }
// else if (stepType[i] == CustomIntegrator::UpdateContextState) {
// recordChangedParameters(context);
// context.updateContextState();
// }
// else if (stepType[i] == CustomIntegrator::ConstrainPositions) {
// cu.getIntegrationUtilities().applyConstraints(integrator.getConstraintTolerance());
// cu.executeKernel(kernels[i][0], numAtoms);
// cu.getIntegrationUtilities().computeVirtualSites();
// }
// else if (stepType[i] == CustomIntegrator::ConstrainVelocities) {
// cu.getIntegrationUtilities().applyVelocityConstraints(integrator.getConstraintTolerance());
// }
// if (invalidatesForces[i])
// forcesAreValid = false;
// }
// recordChangedParameters(context);
//
// // Update the time and step count.
//
// cu.setTime(cu.getTime()+stepSize);
// cu.setStepCount(cu.getStepCount()+1);
//}
//
//void CudaIntegrateCustomStepKernel::recordChangedParameters(ContextImpl& context) {
// if (!modifiesParameters)
// return;
// contextParameterValues->download();
// for (int i = 0; i < (int) parameterNames.size(); i++) {
// float value = (float) context.getParameter(parameterNames[i]);
// if (value != contextParameterValues->get(i))
// context.setParameter(parameterNames[i], contextParameterValues->get(i));
// }
//}
//
//void CudaIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, vector<double>& values) const {
// globalValues->download();
// values.resize(numGlobalVariables);
// for (int i = 0; i < numGlobalVariables; i++)
// values[i] = globalValues->get(i);
//}
//
//void CudaIntegrateCustomStepKernel::setGlobalVariables(ContextImpl& context, const vector<double>& values) {
// for (int i = 0; i < numGlobalVariables; i++)
// globalValues->set(i, (float) values[i]);
// globalValues->upload();
//}
//
//void CudaIntegrateCustomStepKernel::getPerDofVariable(ContextImpl& context, int variable, vector<Vec3>& values) const {
// if (!localValuesAreCurrent) {
// perDofValues->getParameterValues(localPerDofValues);
// localValuesAreCurrent = true;
// }
// values.resize(perDofValues->getNumObjects()/3);
// CudaArray<cl_int>& order = cu.getAtomIndex();
// for (int i = 0; i < (int) values.size(); i++)
// for (int j = 0; j < 3; j++)
// values[order[i]][j] = localPerDofValues[3*i+j][variable];
//}
//
//void CudaIntegrateCustomStepKernel::setPerDofVariable(ContextImpl& context, int variable, const vector<Vec3>& values) {
// if (!localValuesAreCurrent) {
// perDofValues->getParameterValues(localPerDofValues);
// localValuesAreCurrent = true;
// }
// CudaArray<cl_int>& order = cu.getAtomIndex();
// for (int i = 0; i < (int) values.size(); i++)
// for (int j = 0; j < 3; j++)
// localPerDofValues[3*i+j][variable] = (float) values[order[i]][j];
// deviceValuesAreCurrent = false;
//}
CudaApplyAndersenThermostatKernel
::~
CudaApplyAndersenThermostatKernel
()
{
CudaApplyAndersenThermostatKernel
::~
CudaApplyAndersenThermostatKernel
()
{
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
...
...
platforms/cuda2/src/CudaKernels.h
View file @
bd22eada
...
@@ -623,50 +623,49 @@ private:
...
@@ -623,50 +623,49 @@ private:
static
const
int
PmeOrder
=
5
;
static
const
int
PmeOrder
=
5
;
};
};
///**
/**
// * This kernel is invoked by CustomNonbondedForce to calculate the forces acting on the system.
* This kernel is invoked by CustomNonbondedForce to calculate the forces acting on the system.
// */
*/
//class CudaCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel {
class
CudaCalcCustomNonbondedForceKernel
:
public
CalcCustomNonbondedForceKernel
{
//public:
public:
// CudaCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcCustomNonbondedForceKernel(name, platform),
CudaCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
System
&
system
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
// hasInitializedKernel(false), cu(cu), params(NULL), globals(NULL), tabulatedFunctionParams(NULL), system(system) {
cu
(
cu
),
params
(
NULL
),
globals
(
NULL
),
tabulatedFunctionParams
(
NULL
),
system
(
system
)
{
// }
}
// ~CudaCalcCustomNonbondedForceKernel();
~
CudaCalcCustomNonbondedForceKernel
();
// /**
/**
// * Initialize the kernel.
* Initialize the kernel.
// *
*
// * @param system the System this kernel will be applied to
* @param system the System this kernel will be applied to
// * @param force the CustomNonbondedForce this kernel will be used for
* @param force the CustomNonbondedForce this kernel will be used for
// */
*/
// void initialize(const System& system, const CustomNonbondedForce& force);
void
initialize
(
const
System
&
system
,
const
CustomNonbondedForce
&
force
);
// /**
/**
// * Execute the kernel to calculate the forces and/or energy.
* Execute the kernel to calculate the forces and/or energy.
// *
*
// * @param context the context in which to execute this kernel
* @param context the context in which to execute this kernel
// * @param includeForces true if forces should be calculated
* @param includeForces true if forces should be calculated
// * @param includeEnergy true if the energy should be calculated
* @param includeEnergy true if the energy should be calculated
// * @return the potential energy due to the force
* @return the potential energy due to the force
// */
*/
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
// /**
/**
// * Copy changed parameters over to a context.
* Copy changed parameters over to a context.
// *
*
// * @param context the context to copy parameters to
* @param context the context to copy parameters to
// * @param force the CustomNonbondedForce to copy the parameters from
* @param force the CustomNonbondedForce to copy the parameters from
// */
*/
// void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
);
//private:
private:
// bool hasInitializedKernel;
CudaContext
&
cu
;
// CudaContext& cu;
CudaParameterSet
*
params
;
// CudaParameterSet* params;
CudaArray
*
globals
;
// CudaArray<cl_float>* globals;
CudaArray
*
tabulatedFunctionParams
;
// CudaArray<mm_float4>* tabulatedFunctionParams;
std
::
vector
<
std
::
string
>
globalParamNames
;
// std::vector<std::string> globalParamNames;
std
::
vector
<
float
>
globalParamValues
;
// std::vector<cl_float> globalParamValues;
std
::
vector
<
CudaArray
*>
tabulatedFunctions
;
// std::vector<CudaArray<mm_float4>*> tabulatedFunctions;
System
&
system
;
// System& system;
};
//};
//
///**
///**
// * This kernel is invoked by GBSAOBCForce to calculate the forces acting on the system.
// * This kernel is invoked by GBSAOBCForce to calculate the forces acting on the system.
// */
// */
...
@@ -814,60 +813,58 @@ private:
...
@@ -814,60 +813,58 @@ private:
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
float
>
globalParamValues
;
};
};
///**
/**
// * This kernel is invoked by CustomHbondForce to calculate the forces acting on the system.
* This kernel is invoked by CustomHbondForce to calculate the forces acting on the system.
// */
*/
//class CudaCalcCustomHbondForceKernel : public CalcCustomHbondForceKernel {
class
CudaCalcCustomHbondForceKernel
:
public
CalcCustomHbondForceKernel
{
//public:
public:
// CudaCalcCustomHbondForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CalcCustomHbondForceKernel(name, platform),
CudaCalcCustomHbondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
System
&
system
)
:
CalcCustomHbondForceKernel
(
name
,
platform
),
// hasInitializedKernel(false), cu(cu), donorParams(NULL), acceptorParams(NULL), donors(NULL), acceptors(NULL),
hasInitializedKernel
(
false
),
cu
(
cu
),
donorParams
(
NULL
),
acceptorParams
(
NULL
),
donors
(
NULL
),
acceptors
(
NULL
),
// donorBufferIndices(NULL), acceptorBufferIndices(NULL), globals(NULL), donorExclusions(NULL), acceptorExclusions(NULL),
globals
(
NULL
),
donorExclusions
(
NULL
),
acceptorExclusions
(
NULL
),
tabulatedFunctionParams
(
NULL
),
system
(
system
)
{
// tabulatedFunctionParams(NULL), system(system) {
}
// }
~
CudaCalcCustomHbondForceKernel
();
// ~CudaCalcCustomHbondForceKernel();
/**
// /**
* Initialize the kernel.
// * Initialize the kernel.
*
// *
* @param system the System this kernel will be applied to
// * @param system the System this kernel will be applied to
* @param force the CustomHbondForce this kernel will be used for
// * @param force the CustomHbondForce this kernel will be used for
*/
// */
void
initialize
(
const
System
&
system
,
const
CustomHbondForce
&
force
);
// void initialize(const System& system, const CustomHbondForce& force);
/**
// /**
* Execute the kernel to calculate the forces and/or energy.
// * Execute the kernel to calculate the forces and/or energy.
*
// *
* @param context the context in which to execute this kernel
// * @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
// * @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
// * @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
// * @return the potential energy due to the force
*/
// */
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
// double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
// /**
* Copy changed parameters over to a context.
// * Copy changed parameters over to a context.
*
// *
* @param context the context to copy parameters to
// * @param context the context to copy parameters to
* @param force the CustomHbondForce to copy the parameters from
// * @param force the CustomHbondForce to copy the parameters from
*/
// */
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomHbondForce
&
force
);
// void copyParametersToContext(ContextImpl& context, const CustomHbondForce& force);
private:
//private:
int
numDonors
,
numAcceptors
;
// int numDonors, numAcceptors;
bool
hasInitializedKernel
;
// bool hasInitializedKernel;
CudaContext
&
cu
;
// CudaContext& cu;
CudaParameterSet
*
donorParams
;
// CudaParameterSet* donorParams;
CudaParameterSet
*
acceptorParams
;
// CudaParameterSet* acceptorParams;
CudaArray
*
globals
;
// CudaArray<cl_float>* globals;
CudaArray
*
donors
;
// CudaArray<mm_int4>* donors;
CudaArray
*
acceptors
;
// CudaArray<mm_int4>* acceptors;
CudaArray
*
donorExclusions
;
// CudaArray<mm_int4>* donorBufferIndices;
CudaArray
*
acceptorExclusions
;
// CudaArray<mm_int4>* acceptorBufferIndices;
CudaArray
*
tabulatedFunctionParams
;
// CudaArray<mm_int4>* donorExclusions;
std
::
vector
<
std
::
string
>
globalParamNames
;
// CudaArray<mm_int4>* acceptorExclusions;
std
::
vector
<
float
>
globalParamValues
;
// CudaArray<mm_float4>* tabulatedFunctionParams;
std
::
vector
<
CudaArray
*>
tabulatedFunctions
;
// std::vector<std::string> globalParamNames;
std
::
vector
<
void
*>
donorArgs
,
acceptorArgs
;
// std::vector<cl_float> globalParamValues;
System
&
system
;
// std::vector<CudaArray<mm_float4>*> tabulatedFunctions;
CUfunction
donorKernel
,
acceptorKernel
;
// System& system;
};
// CUfunction donorKernel, acceptorKernel;
//};
/**
/**
* This kernel is invoked by CustomCompoundBondForce to calculate the forces acting on the system.
* This kernel is invoked by CustomCompoundBondForce to calculate the forces acting on the system.
...
@@ -1062,94 +1059,98 @@ private:
...
@@ -1062,94 +1059,98 @@ private:
double
prevTemp
,
prevFriction
,
prevErrorTol
;
double
prevTemp
,
prevFriction
,
prevErrorTol
;
};
};
///**
/**
// * This kernel is invoked by CustomIntegrator to take one time step.
* This kernel is invoked by CustomIntegrator to take one time step.
// */
*/
//class CudaIntegrateCustomStepKernel : public IntegrateCustomStepKernel {
class
CudaIntegrateCustomStepKernel
:
public
IntegrateCustomStepKernel
{
//public:
public:
// CudaIntegrateCustomStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateCustomStepKernel(name, platform), cu(cu),
CudaIntegrateCustomStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
IntegrateCustomStepKernel
(
name
,
platform
),
cu
(
cu
),
// hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), contextParameterValues(NULL), sumBuffer(NULL), energy(NULL),
hasInitializedKernels
(
false
),
localValuesAreCurrent
(
false
),
globalValues
(
NULL
),
contextParameterValues
(
NULL
),
sumBuffer
(
NULL
),
energy
(
NULL
),
// uniformRandoms(NULL), randomSeed(NULL), perDofValues(NULL) {
uniformRandoms
(
NULL
),
randomSeed
(
NULL
),
perDofValues
(
NULL
)
{
// }
}
// ~CudaIntegrateCustomStepKernel();
~
CudaIntegrateCustomStepKernel
();
// /**
/**
// * Initialize the kernel.
* Initialize the kernel.
// *
*
// * @param system the System this kernel will be applied to
* @param system the System this kernel will be applied to
// * @param integrator the CustomIntegrator this kernel will be used for
* @param integrator the CustomIntegrator this kernel will be used for
// */
*/
// void initialize(const System& system, const CustomIntegrator& integrator);
void
initialize
(
const
System
&
system
,
const
CustomIntegrator
&
integrator
);
// /**
/**
// * Execute the kernel.
* Execute the kernel.
// *
*
// * @param context the context in which to execute this kernel
* @param context the context in which to execute this kernel
// * @param integrator the CustomIntegrator this kernel is being used for
* @param integrator the CustomIntegrator this kernel is being used for
// * @param forcesAreValid if the context has been modified since the last time step, this will be
* @param forcesAreValid if the context has been modified since the last time step, this will be
// * false to show that cached forces are invalid and must be recalculated.
* false to show that cached forces are invalid and must be recalculated.
// * On exit, this should specify whether the cached forces are valid at the
* On exit, this should specify whether the cached forces are valid at the
// * end of the step.
* end of the step.
// */
*/
// void execute(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
void
execute
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
// /**
/**
// * Get the values of all global variables.
* Get the values of all global variables.
// *
*
// * @param context the context in which to execute this kernel
* @param context the context in which to execute this kernel
// * @param values on exit, this contains the values
* @param values on exit, this contains the values
// */
*/
// void getGlobalVariables(ContextImpl& context, std::vector<double>& values) const;
void
getGlobalVariables
(
ContextImpl
&
context
,
std
::
vector
<
double
>&
values
)
const
;
// /**
/**
// * Set the values of all global variables.
* Set the values of all global variables.
// *
*
// * @param context the context in which to execute this kernel
* @param context the context in which to execute this kernel
// * @param values a vector containing the values
* @param values a vector containing the values
// */
*/
// void setGlobalVariables(ContextImpl& context, const std::vector<double>& values);
void
setGlobalVariables
(
ContextImpl
&
context
,
const
std
::
vector
<
double
>&
values
);
// /**
/**
// * Get the values of a per-DOF variable.
* Get the values of a per-DOF variable.
// *
*
// * @param context the context in which to execute this kernel
* @param context the context in which to execute this kernel
// * @param variable the index of the variable to get
* @param variable the index of the variable to get
// * @param values on exit, this contains the values
* @param values on exit, this contains the values
// */
*/
// void getPerDofVariable(ContextImpl& context, int variable, std::vector<Vec3>& values) const;
void
getPerDofVariable
(
ContextImpl
&
context
,
int
variable
,
std
::
vector
<
Vec3
>&
values
)
const
;
// /**
/**
// * Set the values of a per-DOF variable.
* Set the values of a per-DOF variable.
// *
*
// * @param context the context in which to execute this kernel
* @param context the context in which to execute this kernel
// * @param variable the index of the variable to get
* @param variable the index of the variable to get
// * @param values a vector containing the values
* @param values a vector containing the values
// */
*/
// void setPerDofVariable(ContextImpl& context, int variable, const std::vector<Vec3>& values);
void
setPerDofVariable
(
ContextImpl
&
context
,
int
variable
,
const
std
::
vector
<
Vec3
>&
values
);
//private:
private:
// class ReorderListener;
class
ReorderListener
;
// std::string createGlobalComputation(const std::string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator, const std::string& energyName);
std
::
string
createGlobalComputation
(
const
std
::
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
CustomIntegrator
&
integrator
,
const
std
::
string
&
energyName
);
// std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName);
std
::
string
createPerDofComputation
(
const
std
::
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
int
component
,
CustomIntegrator
&
integrator
,
const
std
::
string
&
forceName
,
const
std
::
string
&
energyName
);
// void recordChangedParameters(ContextImpl& context);
void
recordChangedParameters
(
ContextImpl
&
context
);
// CudaContext& cu;
CudaContext
&
cu
;
// double prevStepSize;
double
prevStepSize
;
// int numGlobalVariables;
int
numGlobalVariables
;
// bool hasInitializedKernels, deviceValuesAreCurrent, modifiesParameters;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
modifiesParameters
;
// mutable bool localValuesAreCurrent;
mutable
bool
localValuesAreCurrent
;
// CudaArray<cl_float>* globalValues;
CudaArray
*
globalValues
;
// CudaArray<cl_float>* contextParameterValues;
CudaArray
*
contextParameterValues
;
// CudaArray<cl_float>* sumBuffer;
CudaArray
*
sumBuffer
;
// CudaArray<cl_float>* energy;
CudaArray
*
energy
;
// CudaArray<mm_float4>* uniformRandoms;
CudaArray
*
uniformRandoms
;
// CudaArray<mm_int4>* randomSeed;
CudaArray
*
randomSeed
;
// CudaParameterSet* perDofValues;
CudaParameterSet
*
perDofValues
;
// mutable std::vector<std::vector<cl_float> > localPerDofValues;
mutable
std
::
vector
<
std
::
vector
<
float
>
>
localPerDofValuesFloat
;
// std::vector<std::vector<CUfunction> > kernels;
mutable
std
::
vector
<
std
::
vector
<
double
>
>
localPerDofValuesDouble
;
// CUfunction sumEnergyKernel, randomKernel;
std
::
vector
<
float
>
contextValuesFloat
;
// std::vector<CustomIntegrator::ComputationType> stepType;
std
::
vector
<
double
>
contextValuesDouble
;
// std::vector<bool> needsForces;
std
::
vector
<
std
::
vector
<
CUfunction
>
>
kernels
;
// std::vector<bool> needsEnergy;
std
::
vector
<
std
::
vector
<
std
::
vector
<
void
*>
>
>
kernelArgs
;
// std::vector<bool> invalidatesForces;
CUfunction
sumEnergyKernel
,
randomKernel
;
// std::vector<bool> merged;
std
::
vector
<
CustomIntegrator
::
ComputationType
>
stepType
;
// std::vector<int> forceGroup;
std
::
vector
<
bool
>
needsForces
;
// std::vector<int> requiredGaussian;
std
::
vector
<
bool
>
needsEnergy
;
// std::vector<int> requiredUniform;
std
::
vector
<
bool
>
invalidatesForces
;
// std::vector<std::string> parameterNames;
std
::
vector
<
bool
>
merged
;
//};
std
::
vector
<
int
>
forceGroup
;
std
::
vector
<
int
>
requiredGaussian
;
std
::
vector
<
int
>
requiredUniform
;
std
::
vector
<
std
::
string
>
parameterNames
;
};
/**
/**
* This kernel is invoked by AndersenThermostat at the start of each time step to adjust the particle velocities.
* This kernel is invoked by AndersenThermostat at the start of each time step to adjust the particle velocities.
...
...
platforms/cuda2/src/CudaParameterSet.cpp
View file @
bd22eada
...
@@ -39,11 +39,12 @@ using namespace std;
...
@@ -39,11 +39,12 @@ using namespace std;
throw OpenMMException(m.str());\
throw OpenMMException(m.str());\
}
}
CudaParameterSet
::
CudaParameterSet
(
CudaContext
&
context
,
int
numParameters
,
int
numObjects
,
const
string
&
name
,
bool
bufferPerParameter
)
:
CudaParameterSet
::
CudaParameterSet
(
CudaContext
&
context
,
int
numParameters
,
int
numObjects
,
const
string
&
name
,
bool
bufferPerParameter
,
bool
useDoublePrecision
)
:
context
(
context
),
numParameters
(
numParameters
),
numObjects
(
numObjects
),
name
(
name
)
{
context
(
context
),
numParameters
(
numParameters
),
numObjects
(
numObjects
),
name
(
name
)
{
int
params
=
numParameters
;
int
params
=
numParameters
;
int
bufferCount
=
0
;
int
bufferCount
=
0
;
int
elementSize
=
4
;
elementSize
=
(
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
));
string
elementType
=
(
useDoublePrecision
?
"double"
:
"float"
);
CUdeviceptr
pointer
;
CUdeviceptr
pointer
;
string
errorMessage
=
"Error creating parameter set "
+
name
;
string
errorMessage
=
"Error creating parameter set "
+
name
;
if
(
!
bufferPerParameter
)
{
if
(
!
bufferPerParameter
)
{
...
@@ -51,14 +52,14 @@ CudaParameterSet::CudaParameterSet(CudaContext& context, int numParameters, int
...
@@ -51,14 +52,14 @@ CudaParameterSet::CudaParameterSet(CudaContext& context, int numParameters, int
CHECK_RESULT
(
cuMemAlloc
(
&
pointer
,
numObjects
*
elementSize
*
4
));
CHECK_RESULT
(
cuMemAlloc
(
&
pointer
,
numObjects
*
elementSize
*
4
));
std
::
stringstream
name
;
std
::
stringstream
name
;
name
<<
"param"
<<
(
++
bufferCount
);
name
<<
"param"
<<
(
++
bufferCount
);
buffers
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
"float"
,
4
,
elementSize
*
4
,
pointer
));
buffers
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
elementType
,
4
,
elementSize
*
4
,
pointer
));
params
-=
4
;
params
-=
4
;
}
}
if
(
params
>
1
)
{
if
(
params
>
1
)
{
CHECK_RESULT
(
cuMemAlloc
(
&
pointer
,
numObjects
*
elementSize
*
2
));
CHECK_RESULT
(
cuMemAlloc
(
&
pointer
,
numObjects
*
elementSize
*
2
));
std
::
stringstream
name
;
std
::
stringstream
name
;
name
<<
"param"
<<
(
++
bufferCount
);
name
<<
"param"
<<
(
++
bufferCount
);
buffers
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
"float"
,
2
,
elementSize
*
2
,
pointer
));
buffers
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
elementType
,
2
,
elementSize
*
2
,
pointer
));
params
-=
2
;
params
-=
2
;
}
}
}
}
...
@@ -66,50 +67,55 @@ CudaParameterSet::CudaParameterSet(CudaContext& context, int numParameters, int
...
@@ -66,50 +67,55 @@ CudaParameterSet::CudaParameterSet(CudaContext& context, int numParameters, int
CHECK_RESULT
(
cuMemAlloc
(
&
pointer
,
numObjects
*
elementSize
));
CHECK_RESULT
(
cuMemAlloc
(
&
pointer
,
numObjects
*
elementSize
));
std
::
stringstream
name
;
std
::
stringstream
name
;
name
<<
"param"
<<
(
++
bufferCount
);
name
<<
"param"
<<
(
++
bufferCount
);
buffers
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
"float"
,
1
,
elementSize
,
pointer
));
buffers
.
push_back
(
CudaNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
elementType
,
1
,
elementSize
,
pointer
));
params
--
;
params
--
;
}
}
}
}
CudaParameterSet
::~
CudaParameterSet
()
{
CudaParameterSet
::~
CudaParameterSet
()
{
string
errorMessage
=
"Error freeing device memory"
;
if
(
context
.
getContextIsValid
())
{
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
string
errorMessage
=
"Error freeing device memory"
;
CHECK_RESULT
(
cuMemFree
(
buffers
[
i
].
getMemory
()));
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
CHECK_RESULT
(
cuMemFree
(
buffers
[
i
].
getMemory
()));
}
}
}
void
CudaParameterSet
::
getParameterValues
(
vector
<
vector
<
float
>
>&
values
)
{
template
<
class
T
>
void
CudaParameterSet
::
getParameterValues
(
vector
<
vector
<
T
>
>&
values
)
{
if
(
sizeof
(
T
)
!=
elementSize
)
throw
OpenMMException
(
"Called getParameterValues() with vector of wrong type"
);
values
.
resize
(
numObjects
);
values
.
resize
(
numObjects
);
for
(
int
i
=
0
;
i
<
numObjects
;
i
++
)
for
(
int
i
=
0
;
i
<
numObjects
;
i
++
)
values
[
i
].
resize
(
numParameters
);
values
[
i
].
resize
(
numParameters
);
int
base
=
0
;
int
base
=
0
;
string
errorMessage
=
"Error downloading parameter set "
+
name
;
string
errorMessage
=
"Error downloading parameter set "
+
name
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
buffers
[
i
].
get
Typ
e
()
==
"float4"
)
{
if
(
buffers
[
i
].
get
Siz
e
()
==
4
*
elementSize
)
{
vector
<
float4
>
data
(
numObjects
);
vector
<
T
>
data
(
4
*
numObjects
);
CHECK_RESULT
(
cuMemcpyDtoH
(
&
data
[
0
],
buffers
[
i
].
getMemory
(),
numObjects
*
buffers
[
i
].
getSize
()));
CHECK_RESULT
(
cuMemcpyDtoH
(
&
data
[
0
],
buffers
[
i
].
getMemory
(),
numObjects
*
buffers
[
i
].
getSize
()));
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
values
[
j
][
base
]
=
data
[
j
]
.
x
;
values
[
j
][
base
]
=
data
[
4
*
j
];
if
(
base
+
1
<
numParameters
)
if
(
base
+
1
<
numParameters
)
values
[
j
][
base
+
1
]
=
data
[
j
].
y
;
values
[
j
][
base
+
1
]
=
data
[
4
*
j
+
1
]
;
if
(
base
+
2
<
numParameters
)
if
(
base
+
2
<
numParameters
)
values
[
j
][
base
+
2
]
=
data
[
j
].
z
;
values
[
j
][
base
+
2
]
=
data
[
4
*
j
+
2
]
;
if
(
base
+
3
<
numParameters
)
if
(
base
+
3
<
numParameters
)
values
[
j
][
base
+
3
]
=
data
[
j
].
w
;
values
[
j
][
base
+
3
]
=
data
[
4
*
j
+
3
]
;
}
}
base
+=
4
;
base
+=
4
;
}
}
else
if
(
buffers
[
i
].
get
Typ
e
()
==
"float2"
)
{
else
if
(
buffers
[
i
].
get
Siz
e
()
==
2
*
elementSize
)
{
vector
<
float2
>
data
(
numObjects
);
vector
<
T
>
data
(
2
*
numObjects
);
CHECK_RESULT
(
cuMemcpyDtoH
(
&
data
[
0
],
buffers
[
i
].
getMemory
(),
numObjects
*
buffers
[
i
].
getSize
()));
CHECK_RESULT
(
cuMemcpyDtoH
(
&
data
[
0
],
buffers
[
i
].
getMemory
(),
numObjects
*
buffers
[
i
].
getSize
()));
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
values
[
j
][
base
]
=
data
[
j
]
.
x
;
values
[
j
][
base
]
=
data
[
2
*
j
];
if
(
base
+
1
<
numParameters
)
if
(
base
+
1
<
numParameters
)
values
[
j
][
base
+
1
]
=
data
[
j
].
y
;
values
[
j
][
base
+
1
]
=
data
[
2
*
j
+
1
]
;
}
}
base
+=
2
;
base
+=
2
;
}
}
else
if
(
buffers
[
i
].
get
Typ
e
()
==
"float"
)
{
else
if
(
buffers
[
i
].
get
Siz
e
()
==
elementSize
)
{
vector
<
float
>
data
(
numObjects
);
vector
<
T
>
data
(
numObjects
);
CHECK_RESULT
(
cuMemcpyDtoH
(
&
data
[
0
],
buffers
[
i
].
getMemory
(),
numObjects
*
buffers
[
i
].
getSize
()));
CHECK_RESULT
(
cuMemcpyDtoH
(
&
data
[
0
],
buffers
[
i
].
getMemory
(),
numObjects
*
buffers
[
i
].
getSize
()));
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
values
[
j
][
base
]
=
data
[
j
];
values
[
j
][
base
]
=
data
[
j
];
...
@@ -120,36 +126,39 @@ void CudaParameterSet::getParameterValues(vector<vector<float> >& values) {
...
@@ -120,36 +126,39 @@ void CudaParameterSet::getParameterValues(vector<vector<float> >& values) {
}
}
}
}
void
CudaParameterSet
::
setParameterValues
(
const
vector
<
vector
<
float
>
>&
values
)
{
template
<
class
T
>
void
CudaParameterSet
::
setParameterValues
(
const
vector
<
vector
<
T
>
>&
values
)
{
if
(
sizeof
(
T
)
!=
elementSize
)
throw
OpenMMException
(
"Called setParameterValues() with vector of wrong type"
);
int
base
=
0
;
int
base
=
0
;
string
errorMessage
=
"Error uploading parameter set "
+
name
;
string
errorMessage
=
"Error uploading parameter set "
+
name
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
buffers
[
i
].
get
Typ
e
()
==
"float4"
)
{
if
(
buffers
[
i
].
get
Siz
e
()
==
4
*
elementSize
)
{
vector
<
float4
>
data
(
numObjects
);
vector
<
T
>
data
(
4
*
numObjects
);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
data
[
j
]
.
x
=
values
[
j
][
base
];
data
[
4
*
j
]
=
values
[
j
][
base
];
if
(
base
+
1
<
numParameters
)
if
(
base
+
1
<
numParameters
)
data
[
j
].
y
=
values
[
j
][
base
+
1
];
data
[
4
*
j
+
1
]
=
values
[
j
][
base
+
1
];
if
(
base
+
2
<
numParameters
)
if
(
base
+
2
<
numParameters
)
data
[
j
].
z
=
values
[
j
][
base
+
2
];
data
[
4
*
j
+
2
]
=
values
[
j
][
base
+
2
];
if
(
base
+
3
<
numParameters
)
if
(
base
+
3
<
numParameters
)
data
[
j
].
w
=
values
[
j
][
base
+
3
];
data
[
4
*
j
+
3
]
=
values
[
j
][
base
+
3
];
}
}
CHECK_RESULT
(
cuMemcpyHtoD
(
buffers
[
i
].
getMemory
(),
&
data
[
0
],
numObjects
*
buffers
[
i
].
getSize
()));
CHECK_RESULT
(
cuMemcpyHtoD
(
buffers
[
i
].
getMemory
(),
&
data
[
0
],
numObjects
*
buffers
[
i
].
getSize
()));
base
+=
4
;
base
+=
4
;
}
}
else
if
(
buffers
[
i
].
get
Typ
e
()
==
"float2"
)
{
else
if
(
buffers
[
i
].
get
Siz
e
()
==
2
*
elementSize
)
{
vector
<
float2
>
data
(
numObjects
);
vector
<
T
>
data
(
2
*
numObjects
);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
data
[
j
]
.
x
=
values
[
j
][
base
];
data
[
2
*
j
]
=
values
[
j
][
base
];
if
(
base
+
1
<
numParameters
)
if
(
base
+
1
<
numParameters
)
data
[
j
].
y
=
values
[
j
][
base
+
1
];
data
[
2
*
j
+
1
]
=
values
[
j
][
base
+
1
];
}
}
CHECK_RESULT
(
cuMemcpyHtoD
(
buffers
[
i
].
getMemory
(),
&
data
[
0
],
numObjects
*
buffers
[
i
].
getSize
()));
CHECK_RESULT
(
cuMemcpyHtoD
(
buffers
[
i
].
getMemory
(),
&
data
[
0
],
numObjects
*
buffers
[
i
].
getSize
()));
base
+=
2
;
base
+=
2
;
}
}
else
if
(
buffers
[
i
].
get
Typ
e
()
==
"float"
)
{
else
if
(
buffers
[
i
].
get
Siz
e
()
==
elementSize
)
{
vector
<
float
>
data
(
numObjects
);
vector
<
T
>
data
(
numObjects
);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
data
[
j
]
=
values
[
j
][
base
];
data
[
j
]
=
values
[
j
][
base
];
CHECK_RESULT
(
cuMemcpyHtoD
(
buffers
[
i
].
getMemory
(),
&
data
[
0
],
numObjects
*
buffers
[
i
].
getSize
()));
CHECK_RESULT
(
cuMemcpyHtoD
(
buffers
[
i
].
getMemory
(),
&
data
[
0
],
numObjects
*
buffers
[
i
].
getSize
()));
...
@@ -164,16 +173,26 @@ string CudaParameterSet::getParameterSuffix(int index, const std::string& extraS
...
@@ -164,16 +173,26 @@ string CudaParameterSet::getParameterSuffix(int index, const std::string& extraS
const
string
suffixes
[]
=
{
".x"
,
".y"
,
".z"
,
".w"
};
const
string
suffixes
[]
=
{
".x"
,
".y"
,
".z"
,
".w"
};
int
buffer
=
-
1
;
int
buffer
=
-
1
;
for
(
int
i
=
0
;
buffer
==
-
1
&&
i
<
(
int
)
buffers
.
size
();
i
++
)
{
for
(
int
i
=
0
;
buffer
==
-
1
&&
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
index
*
sizeof
(
float
)
<
buffers
[
i
].
getSize
())
if
(
index
*
elementSize
<
buffers
[
i
].
getSize
())
buffer
=
i
;
buffer
=
i
;
else
else
index
-=
buffers
[
i
].
getSize
()
/
sizeof
(
float
)
;
index
-=
buffers
[
i
].
getSize
()
/
elementSize
;
}
}
if
(
buffer
==
-
1
)
if
(
buffer
==
-
1
)
throw
OpenMMException
(
"Internal error: Illegal argument to CudaParameterSet::getParameterSuffix() ("
+
name
+
")"
);
throw
OpenMMException
(
"Internal error: Illegal argument to CudaParameterSet::getParameterSuffix() ("
+
name
+
")"
);
stringstream
suffix
;
stringstream
suffix
;
suffix
<<
(
buffer
+
1
)
<<
extraSuffix
;
suffix
<<
(
buffer
+
1
)
<<
extraSuffix
;
if
(
buffers
[
buffer
].
get
Typ
e
()
!=
"float"
)
if
(
buffers
[
buffer
].
get
Siz
e
()
!=
elementSize
)
suffix
<<
suffixes
[
index
];
suffix
<<
suffixes
[
index
];
return
suffix
.
str
();
return
suffix
.
str
();
}
}
/**
* Define template instantiations for float and double versions of getParameterValues() and setParameterValues().
*/
namespace
OpenMM
{
template
void
CudaParameterSet
::
getParameterValues
<
float
>(
vector
<
vector
<
float
>
>&
values
);
template
void
CudaParameterSet
::
setParameterValues
<
float
>(
const
vector
<
vector
<
float
>
>&
values
);
template
void
CudaParameterSet
::
getParameterValues
<
double
>(
vector
<
vector
<
double
>
>&
values
);
template
void
CudaParameterSet
::
setParameterValues
<
double
>(
const
vector
<
vector
<
double
>
>&
values
);
}
\ No newline at end of file
platforms/cuda2/src/CudaParameterSet.h
View file @
bd22eada
...
@@ -51,8 +51,9 @@ public:
...
@@ -51,8 +51,9 @@ public:
* @param name the name of the parameter set
* @param name the name of the parameter set
* @param bufferPerParameter if true, a separate buffer is created for each parameter. If false,
* @param bufferPerParameter if true, a separate buffer is created for each parameter. If false,
* multiple parameters may be combined into a single buffer.
* multiple parameters may be combined into a single buffer.
* @param useDoublePrecision whether values should be stored as single or double precision
*/
*/
CudaParameterSet
(
CudaContext
&
context
,
int
numParameters
,
int
numObjects
,
const
std
::
string
&
name
,
bool
bufferPerParameter
=
false
);
CudaParameterSet
(
CudaContext
&
context
,
int
numParameters
,
int
numObjects
,
const
std
::
string
&
name
,
bool
bufferPerParameter
=
false
,
bool
useDoublePrecision
=
false
);
~
CudaParameterSet
();
~
CudaParameterSet
();
/**
/**
* Get the number of parameters.
* Get the number of parameters.
...
@@ -71,13 +72,15 @@ public:
...
@@ -71,13 +72,15 @@ public:
*
*
* @param values on exit, values[i][j] contains the value of parameter j for object i
* @param values on exit, values[i][j] contains the value of parameter j for object i
*/
*/
void
getParameterValues
(
std
::
vector
<
std
::
vector
<
float
>
>&
values
);
template
<
class
T
>
void
getParameterValues
(
std
::
vector
<
std
::
vector
<
T
>
>&
values
);
/**
/**
* Set the values of all parameters.
* Set the values of all parameters.
*
*
* @param values values[i][j] contains the value of parameter j for object i
* @param values values[i][j] contains the value of parameter j for object i
*/
*/
void
setParameterValues
(
const
std
::
vector
<
std
::
vector
<
float
>
>&
values
);
template
<
class
T
>
void
setParameterValues
(
const
std
::
vector
<
std
::
vector
<
T
>
>&
values
);
/**
/**
* Get a set of CudaNonbondedUtilities::ParameterInfo objects which describe the Buffers
* Get a set of CudaNonbondedUtilities::ParameterInfo objects which describe the Buffers
* containing the data.
* containing the data.
...
@@ -95,8 +98,7 @@ public:
...
@@ -95,8 +98,7 @@ public:
std
::
string
getParameterSuffix
(
int
index
,
const
std
::
string
&
extraSuffix
=
""
)
const
;
std
::
string
getParameterSuffix
(
int
index
,
const
std
::
string
&
extraSuffix
=
""
)
const
;
private:
private:
CudaContext
&
context
;
CudaContext
&
context
;
int
numParameters
;
int
numParameters
,
numObjects
,
elementSize
;
int
numObjects
;
std
::
string
name
;
std
::
string
name
;
std
::
vector
<
CudaNonbondedUtilities
::
ParameterInfo
>
buffers
;
std
::
vector
<
CudaNonbondedUtilities
::
ParameterInfo
>
buffers
;
};
};
...
...
platforms/cuda2/src/kernels/customHbondForce.cu
0 → 100644
View file @
bd22eada
/**
* Convert a real4 to a real3 by removing its last element.
*/
inline
__device__
real3
trim
(
real4
v
)
{
return
make_real3
(
v
.
x
,
v
.
y
,
v
.
z
);
}
/**
* This does nothing, and just exists to simply the code generation.
*/
inline
__device__
real3
trim
(
real3
v
)
{
return
v
;
}
/**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/
inline
__device__
real4
delta
(
real4
vec1
,
real4
vec2
)
{
real4
result
=
make_real4
(
vec1
.
x
-
vec2
.
x
,
vec1
.
y
-
vec2
.
y
,
vec1
.
z
-
vec2
.
z
,
0.0
f
);
result
.
w
=
result
.
x
*
result
.
x
+
result
.
y
*
result
.
y
+
result
.
z
*
result
.
z
;
return
result
;
}
/**
* Compute the difference between two vectors, taking periodic boundary conditions into account
* and setting the fourth component to the squared magnitude.
*/
inline
__device__
real4
deltaPeriodic
(
real4
vec1
,
real4
vec2
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
)
{
real4
result
=
make_real4
(
vec1
.
x
-
vec2
.
x
,
vec1
.
y
-
vec2
.
y
,
vec1
.
z
-
vec2
.
z
,
0.0
f
);
#ifdef USE_PERIODIC
result
.
x
-=
floor
(
result
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
result
.
y
-=
floor
(
result
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
result
.
z
-=
floor
(
result
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
result
.
w
=
result
.
x
*
result
.
x
+
result
.
y
*
result
.
y
+
result
.
z
*
result
.
z
;
return
result
;
}
/**
* Compute the angle between two vectors. The w component of each vector should contain the squared magnitude.
*/
inline
__device__
real
computeAngle
(
real4
vec1
,
real4
vec2
)
{
real
dotProduct
=
vec1
.
x
*
vec2
.
x
+
vec1
.
y
*
vec2
.
y
+
vec1
.
z
*
vec2
.
z
;
real
cosine
=
dotProduct
*
RSQRT
(
vec1
.
w
*
vec2
.
w
);
real
angle
;
if
(
cosine
>
0.99
f
||
cosine
<
-
0.99
f
)
{
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
real3
crossProduct
=
cross
(
vec1
,
vec2
);
real
scale
=
vec1
.
w
*
vec2
.
w
;
angle
=
asin
(
SQRT
(
dot
(
crossProduct
,
crossProduct
)
/
scale
));
if
(
cosine
<
0.0
f
)
angle
=
M_PI
-
angle
;
}
else
angle
=
acos
(
cosine
);
return
angle
;
}
/**
* Compute the cross product of two vectors, setting the fourth component to the squared magnitude.
*/
inline
__device__
real4
computeCross
(
real4
vec1
,
real4
vec2
)
{
real3
result
=
cross
(
vec1
,
vec2
);
return
make_real4
(
result
.
x
,
result
.
y
,
result
.
z
,
result
.
x
*
result
.
x
+
result
.
y
*
result
.
y
+
result
.
z
*
result
.
z
);
}
/**
* Compute forces on donors.
*/
extern
"C"
__global__
void
computeDonorForces
(
unsigned
long
long
*
__restrict__
force
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
exclusions
,
const
int4
*
__restrict__
donorAtoms
,
const
int4
*
__restrict__
acceptorAtoms
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
PARAMETER_ARGUMENTS
)
{
extern
__shared__
real4
posBuffer
[];
real
energy
=
0
;
real3
f1
=
make_real3
(
0
);
real3
f2
=
make_real3
(
0
);
real3
f3
=
make_real3
(
0
);
for
(
int
donorStart
=
0
;
donorStart
<
NUM_DONORS
;
donorStart
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Load information about the donor this thread will compute forces on.
int
donorIndex
=
donorStart
+
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int4
atoms
,
exclusionIndices
;
real4
d1
,
d2
,
d3
;
if
(
donorIndex
<
NUM_DONORS
)
{
atoms
=
donorAtoms
[
donorIndex
];
d1
=
(
atoms
.
x
>
-
1
?
posq
[
atoms
.
x
]
:
make_real4
(
0
));
d2
=
(
atoms
.
y
>
-
1
?
posq
[
atoms
.
y
]
:
make_real4
(
0
));
d3
=
(
atoms
.
z
>
-
1
?
posq
[
atoms
.
z
]
:
make_real4
(
0
));
#ifdef USE_EXCLUSIONS
exclusionIndices
=
exclusions
[
donorIndex
];
#endif
}
else
atoms
=
make_int4
(
-
1
,
-
1
,
-
1
,
-
1
);
for
(
int
acceptorStart
=
0
;
acceptorStart
<
NUM_ACCEPTORS
;
acceptorStart
+=
blockDim
.
x
)
{
// Load the next block of acceptors into local memory.
int
blockSize
=
min
((
int
)
blockDim
.
x
,
NUM_ACCEPTORS
-
acceptorStart
);
if
(
threadIdx
.
x
<
blockSize
)
{
int4
atoms2
=
acceptorAtoms
[
acceptorStart
+
threadIdx
.
x
];
posBuffer
[
3
*
threadIdx
.
x
]
=
(
atoms2
.
x
>
-
1
?
posq
[
atoms2
.
x
]
:
make_real4
(
0
));
posBuffer
[
3
*
threadIdx
.
x
+
1
]
=
(
atoms2
.
y
>
-
1
?
posq
[
atoms2
.
y
]
:
make_real4
(
0
));
posBuffer
[
3
*
threadIdx
.
x
+
2
]
=
(
atoms2
.
z
>
-
1
?
posq
[
atoms2
.
z
]
:
make_real4
(
0
));
}
__syncthreads
();
if
(
donorIndex
<
NUM_DONORS
)
{
for
(
int
index
=
0
;
index
<
blockSize
;
index
++
)
{
#ifdef USE_EXCLUSIONS
int
acceptorIndex
=
acceptorStart
+
index
;
if
(
acceptorIndex
==
exclusionIndices
.
x
||
acceptorIndex
==
exclusionIndices
.
y
||
acceptorIndex
==
exclusionIndices
.
z
||
acceptorIndex
==
exclusionIndices
.
w
)
continue
;
#endif
// Compute the interaction between a donor and an acceptor.
real4
a1
=
posBuffer
[
3
*
index
];
real4
a2
=
posBuffer
[
3
*
index
+
1
];
real4
a3
=
posBuffer
[
3
*
index
+
2
];
real4
deltaD1A1
=
deltaPeriodic
(
d1
,
a1
,
periodicBoxSize
,
invPeriodicBoxSize
);
#ifdef USE_CUTOFF
if
(
deltaD1A1
.
w
<
CUTOFF_SQUARED
)
{
#endif
COMPUTE_DONOR_FORCE
#ifdef USE_CUTOFF
}
#endif
}
}
}
// Write results
if
(
donorIndex
<
NUM_DONORS
)
{
if
(
atoms
.
x
>
-
1
)
{
atomicAdd
(
&
force
[
atoms
.
x
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f1
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
x
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f1
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
x
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f1
.
z
*
0xFFFFFFFF
)));
__threadfence_block
();
}
if
(
atoms
.
y
>
-
1
)
{
atomicAdd
(
&
force
[
atoms
.
y
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f2
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
y
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f2
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
y
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f2
.
z
*
0xFFFFFFFF
)));
__threadfence_block
();
}
if
(
atoms
.
z
>
-
1
)
{
atomicAdd
(
&
force
[
atoms
.
z
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f3
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
z
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f3
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
z
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f3
.
z
*
0xFFFFFFFF
)));
__threadfence_block
();
}
}
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
}
/**
* Compute forces on acceptors.
*/
extern
"C"
__global__
void
computeAcceptorForces
(
unsigned
long
long
*
__restrict__
force
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
exclusions
,
const
int4
*
__restrict__
donorAtoms
,
const
int4
*
__restrict__
acceptorAtoms
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
PARAMETER_ARGUMENTS
)
{
extern
__shared__
real4
posBuffer
[];
real3
f1
=
make_real3
(
0
);
real3
f2
=
make_real3
(
0
);
real3
f3
=
make_real3
(
0
);
for
(
int
acceptorStart
=
0
;
acceptorStart
<
NUM_ACCEPTORS
;
acceptorStart
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Load information about the acceptor this thread will compute forces on.
int
acceptorIndex
=
acceptorStart
+
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int4
atoms
,
exclusionIndices
;
real4
a1
,
a2
,
a3
;
if
(
acceptorIndex
<
NUM_ACCEPTORS
)
{
atoms
=
acceptorAtoms
[
acceptorIndex
];
a1
=
(
atoms
.
x
>
-
1
?
posq
[
atoms
.
x
]
:
make_real4
(
0
));
a2
=
(
atoms
.
y
>
-
1
?
posq
[
atoms
.
y
]
:
make_real4
(
0
));
a3
=
(
atoms
.
z
>
-
1
?
posq
[
atoms
.
z
]
:
make_real4
(
0
));
#ifdef USE_EXCLUSIONS
exclusionIndices
=
exclusions
[
acceptorIndex
];
#endif
}
else
atoms
=
make_int4
(
-
1
,
-
1
,
-
1
,
-
1
);
for
(
int
donorStart
=
0
;
donorStart
<
NUM_DONORS
;
donorStart
+=
blockDim
.
x
)
{
// Load the next block of donors into local memory.
int
blockSize
=
min
((
int
)
blockDim
.
x
,
NUM_DONORS
-
donorStart
);
if
(
threadIdx
.
x
<
blockSize
)
{
int4
atoms2
=
donorAtoms
[
donorStart
+
threadIdx
.
x
];
posBuffer
[
3
*
threadIdx
.
x
]
=
(
atoms2
.
x
>
-
1
?
posq
[
atoms2
.
x
]
:
make_real4
(
0
));
posBuffer
[
3
*
threadIdx
.
x
+
1
]
=
(
atoms2
.
y
>
-
1
?
posq
[
atoms2
.
y
]
:
make_real4
(
0
));
posBuffer
[
3
*
threadIdx
.
x
+
2
]
=
(
atoms2
.
z
>
-
1
?
posq
[
atoms2
.
z
]
:
make_real4
(
0
));
}
__syncthreads
();
if
(
acceptorIndex
<
NUM_ACCEPTORS
)
{
for
(
int
index
=
0
;
index
<
blockSize
;
index
++
)
{
#ifdef USE_EXCLUSIONS
int
donorIndex
=
donorStart
+
index
;
if
(
donorIndex
==
exclusionIndices
.
x
||
donorIndex
==
exclusionIndices
.
y
||
donorIndex
==
exclusionIndices
.
z
||
donorIndex
==
exclusionIndices
.
w
)
continue
;
#endif
// Compute the interaction between a donor and an acceptor.
real4
d1
=
posBuffer
[
3
*
index
];
real4
d2
=
posBuffer
[
3
*
index
+
1
];
real4
d3
=
posBuffer
[
3
*
index
+
2
];
real4
deltaD1A1
=
deltaPeriodic
(
d1
,
a1
,
periodicBoxSize
,
invPeriodicBoxSize
);
#ifdef USE_CUTOFF
if
(
deltaD1A1
.
w
<
CUTOFF_SQUARED
)
{
#endif
COMPUTE_ACCEPTOR_FORCE
#ifdef USE_CUTOFF
}
#endif
}
}
}
// Write results
if
(
acceptorIndex
<
NUM_ACCEPTORS
)
{
if
(
atoms
.
x
>
-
1
)
{
atomicAdd
(
&
force
[
atoms
.
x
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f1
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
x
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f1
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
x
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f1
.
z
*
0xFFFFFFFF
)));
__threadfence_block
();
}
if
(
atoms
.
y
>
-
1
)
{
atomicAdd
(
&
force
[
atoms
.
y
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f2
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
y
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f2
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
y
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f2
.
z
*
0xFFFFFFFF
)));
__threadfence_block
();
}
if
(
atoms
.
z
>
-
1
)
{
atomicAdd
(
&
force
[
atoms
.
z
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f3
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
z
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f3
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
force
[
atoms
.
z
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f3
.
z
*
0xFFFFFFFF
)));
__threadfence_block
();
}
}
}
}
platforms/cuda2/src/kernels/customIntegrator.cu
0 → 100644
View file @
bd22eada
extern
"C"
__global__
void
computeSum
(
const
real
*
__restrict__
sumBuffer
,
real
*
result
)
{
__shared__
real
tempBuffer
[
WORK_GROUP_SIZE
];
const
unsigned
int
thread
=
threadIdx
.
x
;
real
sum
=
0
;
for
(
unsigned
int
index
=
thread
;
index
<
SUM_BUFFER_SIZE
;
index
+=
blockDim
.
x
)
sum
+=
sumBuffer
[
index
];
tempBuffer
[
thread
]
=
sum
;
for
(
int
i
=
1
;
i
<
WORK_GROUP_SIZE
;
i
*=
2
)
{
__syncthreads
();
if
(
thread
%
(
i
*
2
)
==
0
&&
thread
+
i
<
WORK_GROUP_SIZE
)
tempBuffer
[
thread
]
+=
tempBuffer
[
thread
+
i
];
}
if
(
thread
==
0
)
result
[
SUM_OUTPUT_INDEX
]
=
tempBuffer
[
0
];
}
extern
"C"
__global__
void
applyPositionDeltas
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posDelta
)
{
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
position
=
posq
[
index
];
position
.
x
+=
posDelta
[
index
].
x
;
position
.
y
+=
posDelta
[
index
].
y
;
position
.
z
+=
posDelta
[
index
].
z
;
posq
[
index
]
=
position
;
posDelta
[
index
]
=
make_real4
(
0
,
0
,
0
,
0
);
}
}
extern
"C"
__global__
void
generateRandomNumbers
(
float4
*
__restrict__
random
,
uint4
*
__restrict__
seed
)
{
uint4
state
=
seed
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
];
unsigned
int
carry
=
0
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Generate three uniform random numbers.
state
.
x
=
state
.
x
*
69069
+
1
;
state
.
y
^=
state
.
y
<<
13
;
state
.
y
^=
state
.
y
>>
17
;
state
.
y
^=
state
.
y
<<
5
;
unsigned
int
k
=
(
state
.
z
>>
2
)
+
(
state
.
w
>>
3
)
+
(
carry
>>
2
);
unsigned
int
m
=
state
.
w
+
state
.
w
+
state
.
z
+
carry
;
state
.
z
=
state
.
w
;
state
.
w
=
m
;
carry
=
k
>>
30
;
float
x1
=
(
float
)
max
(
state
.
x
+
state
.
y
+
state
.
w
,
0x00000001u
)
/
(
float
)
0xffffffff
;
state
.
x
=
state
.
x
*
69069
+
1
;
state
.
y
^=
state
.
y
<<
13
;
state
.
y
^=
state
.
y
>>
17
;
state
.
y
^=
state
.
y
<<
5
;
k
=
(
state
.
z
>>
2
)
+
(
state
.
w
>>
3
)
+
(
carry
>>
2
);
m
=
state
.
w
+
state
.
w
+
state
.
z
+
carry
;
state
.
z
=
state
.
w
;
state
.
w
=
m
;
carry
=
k
>>
30
;
float
x2
=
(
float
)
max
(
state
.
x
+
state
.
y
+
state
.
w
,
0x00000001u
)
/
(
float
)
0xffffffff
;
state
.
x
=
state
.
x
*
69069
+
1
;
state
.
y
^=
state
.
y
<<
13
;
state
.
y
^=
state
.
y
>>
17
;
state
.
y
^=
state
.
y
<<
5
;
k
=
(
state
.
z
>>
2
)
+
(
state
.
w
>>
3
)
+
(
carry
>>
2
);
m
=
state
.
w
+
state
.
w
+
state
.
z
+
carry
;
state
.
z
=
state
.
w
;
state
.
w
=
m
;
carry
=
k
>>
30
;
float
x3
=
(
float
)
max
(
state
.
x
+
state
.
y
+
state
.
w
,
0x00000001u
)
/
(
float
)
0xffffffff
;
// Record the values.
random
[
index
]
=
make_float4
(
x1
,
x2
,
x3
,
0.0
f
);
}
seed
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
=
state
;
}
platforms/cuda2/src/kernels/customIntegratorGlobal.cu
0 → 100644
View file @
bd22eada
extern
"C"
__global__
void
computeGlobal
(
real2
*
__restrict__
dt
,
real
*
__restrict__
globals
,
real
*
__restrict__
params
,
float
uniform
,
float
gaussian
,
const
real
*
__restrict__
energy
)
{
COMPUTE_STEP
}
platforms/cuda2/src/kernels/customIntegratorPerDof.cu
0 → 100644
View file @
bd22eada
inline
__device__
double4
convertToDouble4
(
real4
a
)
{
return
make_double4
(
a
.
x
,
a
.
y
,
a
.
z
,
a
.
w
);
}
inline
__device__
real4
convertFromDouble4
(
double4
a
)
{
return
make_real4
(
a
.
x
,
a
.
y
,
a
.
z
,
a
.
w
);
}
extern
"C"
__global__
void
computePerDof
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posDelta
,
real4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
const
real2
*
__restrict__
dt
,
const
real
*
__restrict__
globals
,
const
real
*
__restrict__
params
,
real
*
__restrict__
sum
,
const
float4
*
__restrict__
gaussianValues
,
unsigned
int
randomIndex
,
const
float4
*
__restrict__
uniformValues
,
const
real
*
__restrict__
energy
PARAMETER_ARGUMENTS
)
{
real
stepSize
=
dt
[
0
].
y
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
randomIndex
+=
index
;
const
double
forceScale
=
1.0
/
0xFFFFFFFF
;
while
(
index
<
NUM_ATOMS
)
{
#ifdef LOAD_POS_AS_DELTA
double4
position
=
convertToDouble4
(
posq
[
index
]
+
posDelta
[
index
]);
#else
double4
position
=
convertToDouble4
(
posq
[
index
]);
#endif
double4
velocity
=
convertToDouble4
(
velm
[
index
]);
double4
f
=
make_double4
(
forceScale
*
force
[
index
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
],
forceScale
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
],
0.0
);
double
mass
=
1.0
/
velocity
.
w
;
if
(
velocity
.
w
!=
0.0
)
{
float4
gaussian
=
gaussianValues
[
randomIndex
];
float4
uniform
=
uniformValues
[
index
];
COMPUTE_STEP
}
randomIndex
+=
blockDim
.
x
*
gridDim
.
x
;
index
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
platforms/cuda2/src/kernels/customNonbonded.cu
0 → 100644
View file @
bd22eada
#ifdef USE_CUTOFF
if
(
!
isExcluded
&&
r2
<
CUTOFF_SQUARED
)
{
#else
if
(
!
isExcluded
)
{
#endif
real
tempForce
=
0
;
COMPUTE_FORCE
dEdR
+=
tempForce
*
invR
;
}
platforms/cuda2/tests/TestCudaCustomHbondForce.cpp
0 → 100644
View file @
bd22eada
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of CustomHbondForce.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/CustomHbondForce.h"
#include "openmm/HarmonicAngleForce.h"
#include "openmm/HarmonicBondForce.h"
#include "openmm/PeriodicTorsionForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
const
double
TOL
=
1e-5
;
void
testHbond
()
{
CudaPlatform
platform
;
// Create a system using a CustomHbondForce.
System
customSystem
;
customSystem
.
addParticle
(
1.0
);
customSystem
.
addParticle
(
1.0
);
customSystem
.
addParticle
(
1.0
);
customSystem
.
addParticle
(
1.0
);
customSystem
.
addParticle
(
1.0
);
CustomHbondForce
*
custom
=
new
CustomHbondForce
(
"0.5*kr*(distance(d1,a1)-r0)^2 + 0.5*ktheta*(angle(a1,d1,d2)-theta0)^2 + 0.5*kpsi*(angle(d1,a1,a2)-psi0)^2 + kchi*(1+cos(n*dihedral(a3,a2,a1,d1)-chi0))"
);
custom
->
addPerDonorParameter
(
"r0"
);
custom
->
addPerDonorParameter
(
"theta0"
);
custom
->
addPerDonorParameter
(
"psi0"
);
custom
->
addPerAcceptorParameter
(
"chi0"
);
custom
->
addPerAcceptorParameter
(
"n"
);
custom
->
addGlobalParameter
(
"kr"
,
0.4
);
custom
->
addGlobalParameter
(
"ktheta"
,
0.5
);
custom
->
addGlobalParameter
(
"kpsi"
,
0.6
);
custom
->
addGlobalParameter
(
"kchi"
,
0.7
);
vector
<
double
>
parameters
(
3
);
parameters
[
0
]
=
1.5
;
parameters
[
1
]
=
1.7
;
parameters
[
2
]
=
1.9
;
custom
->
addDonor
(
1
,
0
,
-
1
,
parameters
);
parameters
.
resize
(
2
);
parameters
[
0
]
=
2.1
;
parameters
[
1
]
=
2
;
custom
->
addAcceptor
(
2
,
3
,
4
,
parameters
);
custom
->
setCutoffDistance
(
10.0
);
customSystem
.
addForce
(
custom
);
// Create an identical system using HarmonicBondForce, HarmonicAngleForce, and PeriodicTorsionForce.
System
standardSystem
;
standardSystem
.
addParticle
(
1.0
);
standardSystem
.
addParticle
(
1.0
);
standardSystem
.
addParticle
(
1.0
);
standardSystem
.
addParticle
(
1.0
);
standardSystem
.
addParticle
(
1.0
);
HarmonicBondForce
*
bond
=
new
HarmonicBondForce
();
bond
->
addBond
(
1
,
2
,
1.5
,
0.4
);
standardSystem
.
addForce
(
bond
);
HarmonicAngleForce
*
angle
=
new
HarmonicAngleForce
();
angle
->
addAngle
(
0
,
1
,
2
,
1.7
,
0.5
);
angle
->
addAngle
(
1
,
2
,
3
,
1.9
,
0.6
);
standardSystem
.
addForce
(
angle
);
PeriodicTorsionForce
*
torsion
=
new
PeriodicTorsionForce
();
torsion
->
addTorsion
(
1
,
2
,
3
,
4
,
2
,
2.1
,
0.7
);
standardSystem
.
addForce
(
torsion
);
// Set the atoms in various positions, and verify that both systems give identical forces and energy.
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
vector
<
Vec3
>
positions
(
5
);
VerletIntegrator
integrator1
(
0.01
);
VerletIntegrator
integrator2
(
0.01
);
Context
c1
(
customSystem
,
integrator1
,
platform
);
Context
c2
(
standardSystem
,
integrator2
,
platform
);
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
for
(
int
j
=
0
;
j
<
(
int
)
positions
.
size
();
j
++
)
positions
[
j
]
=
Vec3
(
2.0
*
genrand_real2
(
sfmt
),
2.0
*
genrand_real2
(
sfmt
),
2.0
*
genrand_real2
(
sfmt
));
c1
.
setPositions
(
positions
);
c2
.
setPositions
(
positions
);
State
s1
=
c1
.
getState
(
State
::
Forces
|
State
::
Energy
);
State
s2
=
c2
.
getState
(
State
::
Forces
|
State
::
Energy
);
for
(
int
i
=
0
;
i
<
customSystem
.
getNumParticles
();
i
++
)
ASSERT_EQUAL_VEC
(
s2
.
getForces
()[
i
],
s1
.
getForces
()[
i
],
TOL
);
ASSERT_EQUAL_TOL
(
s2
.
getPotentialEnergy
(),
s1
.
getPotentialEnergy
(),
TOL
);
}
// Try changing the parameters and make sure it's still correct.
parameters
.
resize
(
3
);
parameters
[
0
]
=
1.4
;
parameters
[
1
]
=
1.7
;
parameters
[
2
]
=
1.9
;
custom
->
setDonorParameters
(
0
,
1
,
0
,
-
1
,
parameters
);
parameters
.
resize
(
2
);
parameters
[
0
]
=
2.2
;
parameters
[
1
]
=
2
;
custom
->
setAcceptorParameters
(
0
,
2
,
3
,
4
,
parameters
);
bond
->
setBondParameters
(
0
,
1
,
2
,
1.4
,
0.4
);
torsion
->
setTorsionParameters
(
0
,
1
,
2
,
3
,
4
,
2
,
2.2
,
0.7
);
custom
->
updateParametersInContext
(
c1
);
bond
->
updateParametersInContext
(
c2
);
torsion
->
updateParametersInContext
(
c2
);
State
s1
=
c1
.
getState
(
State
::
Forces
|
State
::
Energy
);
State
s2
=
c2
.
getState
(
State
::
Forces
|
State
::
Energy
);
for
(
int
i
=
0
;
i
<
customSystem
.
getNumParticles
();
i
++
)
ASSERT_EQUAL_VEC
(
s2
.
getForces
()[
i
],
s1
.
getForces
()[
i
],
TOL
);
ASSERT_EQUAL_TOL
(
s2
.
getPotentialEnergy
(),
s1
.
getPotentialEnergy
(),
TOL
);
}
void
testExclusions
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomHbondForce
*
custom
=
new
CustomHbondForce
(
"(distance(d1,a1)-1)^2"
);
custom
->
addDonor
(
0
,
1
,
-
1
,
vector
<
double
>
());
custom
->
addDonor
(
1
,
0
,
-
1
,
vector
<
double
>
());
custom
->
addAcceptor
(
2
,
0
,
-
1
,
vector
<
double
>
());
custom
->
addExclusion
(
1
,
0
);
system
.
addForce
(
custom
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
3
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
0
,
2
,
0
);
positions
[
2
]
=
Vec3
(
2
,
0
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
ASSERT_EQUAL_VEC
(
Vec3
(
2
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
2
,
0
,
0
),
forces
[
2
],
TOL
);
ASSERT_EQUAL_TOL
(
1.0
,
state
.
getPotentialEnergy
(),
TOL
);
}
void
testCutoff
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomHbondForce
*
custom
=
new
CustomHbondForce
(
"(distance(d1,a1)-1)^2"
);
custom
->
addDonor
(
0
,
1
,
-
1
,
vector
<
double
>
());
custom
->
addDonor
(
1
,
0
,
-
1
,
vector
<
double
>
());
custom
->
addAcceptor
(
2
,
0
,
-
1
,
vector
<
double
>
());
custom
->
setNonbondedMethod
(
CustomHbondForce
::
CutoffNonPeriodic
);
custom
->
setCutoffDistance
(
2.5
);
system
.
addForce
(
custom
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
3
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
0
,
3
,
0
);
positions
[
2
]
=
Vec3
(
2
,
0
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
ASSERT_EQUAL_VEC
(
Vec3
(
2
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
2
,
0
,
0
),
forces
[
2
],
TOL
);
ASSERT_EQUAL_TOL
(
1.0
,
state
.
getPotentialEnergy
(),
TOL
);
}
void
testCustomFunctions
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomHbondForce
*
custom
=
new
CustomHbondForce
(
"foo(distance(d1,a1))"
);
custom
->
addDonor
(
1
,
0
,
-
1
,
vector
<
double
>
());
custom
->
addDonor
(
2
,
0
,
-
1
,
vector
<
double
>
());
custom
->
addAcceptor
(
0
,
1
,
-
1
,
vector
<
double
>
());
vector
<
double
>
function
(
2
);
function
[
0
]
=
0
;
function
[
1
]
=
1
;
custom
->
addFunction
(
"foo"
,
function
,
0
,
10
);
system
.
addForce
(
custom
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
3
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
0
,
2
,
0
);
positions
[
2
]
=
Vec3
(
2
,
0
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
ASSERT_EQUAL_VEC
(
Vec3
(
0.1
,
0.1
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
-
0.1
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
0.1
,
0
,
0
),
forces
[
2
],
TOL
);
ASSERT_EQUAL_TOL
(
0.1
*
2
+
0.1
*
2
,
state
.
getPotentialEnergy
(),
TOL
);
}
int
main
()
{
try
{
testHbond
();
testExclusions
();
testCutoff
();
testCustomFunctions
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
platforms/cuda2/tests/TestCudaCustomIntegrator.cpp
0 → 100644
View file @
bd22eada
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of CustomIntegrator.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/AndersenThermostat.h"
#include "openmm/HarmonicBondForce.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/CustomIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
const
double
TOL
=
1e-5
;
/**
* Test a simple leapfrog integrator on a single bond.
*/
void
testSingleBond
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
2.0
);
system
.
addParticle
(
2.0
);
CustomIntegrator
integrator
(
0.01
);
integrator
.
addComputePerDof
(
"v"
,
"v+dt*f/m"
);
integrator
.
addComputePerDof
(
"x"
,
"x+dt*v"
);
HarmonicBondForce
*
forceField
=
new
HarmonicBondForce
();
forceField
->
addBond
(
0
,
1
,
1.5
,
1
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
-
1
,
0
,
0
);
positions
[
1
]
=
Vec3
(
1
,
0
,
0
);
context
.
setPositions
(
positions
);
// This is simply a harmonic oscillator, so compare it to the analytical solution.
const
double
freq
=
1.0
;;
State
state
=
context
.
getState
(
State
::
Energy
);
const
double
initialEnergy
=
state
.
getKineticEnergy
()
+
state
.
getPotentialEnergy
();
for
(
int
i
=
0
;
i
<
1000
;
++
i
)
{
state
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Energy
);
double
time
=
state
.
getTime
();
double
expectedDist
=
1.5
+
0.5
*
std
::
cos
(
freq
*
time
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
0.5
*
expectedDist
,
0
,
0
),
state
.
getPositions
()[
0
],
0.02
);
ASSERT_EQUAL_VEC
(
Vec3
(
0.5
*
expectedDist
,
0
,
0
),
state
.
getPositions
()[
1
],
0.02
);
double
expectedSpeed
=
-
0.5
*
freq
*
std
::
sin
(
freq
*
time
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
0.5
*
expectedSpeed
,
0
,
0
),
state
.
getVelocities
()[
0
],
0.02
);
ASSERT_EQUAL_VEC
(
Vec3
(
0.5
*
expectedSpeed
,
0
,
0
),
state
.
getVelocities
()[
1
],
0.02
);
double
energy
=
state
.
getKineticEnergy
()
+
state
.
getPotentialEnergy
();
ASSERT_EQUAL_TOL
(
initialEnergy
,
energy
,
0.01
);
integrator
.
step
(
1
);
}
}
/**
* Test an integrator that enforces constraints.
*/
void
testConstraints
()
{
const
int
numParticles
=
8
;
const
double
temp
=
500.0
;
CudaPlatform
platform
;
System
system
;
CustomIntegrator
integrator
(
0.002
);
integrator
.
addPerDofVariable
(
"oldx"
,
0
);
integrator
.
addComputePerDof
(
"v"
,
"v+dt*f/m"
);
integrator
.
addComputePerDof
(
"oldx"
,
"x"
);
integrator
.
addComputePerDof
(
"x"
,
"x+dt*v"
);
integrator
.
addConstrainPositions
();
integrator
.
addComputePerDof
(
"v"
,
"(x-oldx)/dt"
);
integrator
.
setConstraintTolerance
(
1e-5
);
NonbondedForce
*
forceField
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
i
%
2
==
0
?
5.0
:
10.0
);
forceField
->
addParticle
((
i
%
2
==
0
?
0.2
:
-
0.2
),
0.5
,
5.0
);
}
for
(
int
i
=
0
;
i
<
numParticles
-
1
;
++
i
)
system
.
addConstraint
(
i
,
i
+
1
,
1.0
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
Vec3
>
velocities
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
positions
[
i
]
=
Vec3
(
i
/
2
,
(
i
+
1
)
/
2
,
0
);
velocities
[
i
]
=
Vec3
(
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
);
}
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
// Simulate it and see whether the constraints remain satisfied.
double
initialEnergy
=
0.0
;
for
(
int
i
=
0
;
i
<
1000
;
++
i
)
{
State
state
=
context
.
getState
(
State
::
Positions
|
State
::
Energy
);
for
(
int
j
=
0
;
j
<
system
.
getNumConstraints
();
++
j
)
{
int
particle1
,
particle2
;
double
distance
;
system
.
getConstraintParameters
(
j
,
particle1
,
particle2
,
distance
);
Vec3
p1
=
state
.
getPositions
()[
particle1
];
Vec3
p2
=
state
.
getPositions
()[
particle2
];
double
dist
=
std
::
sqrt
((
p1
[
0
]
-
p2
[
0
])
*
(
p1
[
0
]
-
p2
[
0
])
+
(
p1
[
1
]
-
p2
[
1
])
*
(
p1
[
1
]
-
p2
[
1
])
+
(
p1
[
2
]
-
p2
[
2
])
*
(
p1
[
2
]
-
p2
[
2
]));
ASSERT_EQUAL_TOL
(
distance
,
dist
,
2e-5
);
}
double
energy
=
state
.
getKineticEnergy
()
+
state
.
getPotentialEnergy
();
if
(
i
==
1
)
initialEnergy
=
energy
;
else
if
(
i
>
1
)
ASSERT_EQUAL_TOL
(
initialEnergy
,
energy
,
0.01
);
integrator
.
step
(
1
);
}
}
/**
* Test an integrator that applies constraints directly to velocities.
*/
void
testVelocityConstraints
()
{
const
int
numParticles
=
10
;
CudaPlatform
platform
;
System
system
;
CustomIntegrator
integrator
(
0.002
);
integrator
.
addPerDofVariable
(
"x1"
,
0
);
integrator
.
addComputePerDof
(
"v"
,
"v+0.5*dt*f/m"
);
integrator
.
addComputePerDof
(
"x"
,
"x+dt*v"
);
integrator
.
addComputePerDof
(
"x1"
,
"x"
);
integrator
.
addConstrainPositions
();
integrator
.
addComputePerDof
(
"v"
,
"v+0.5*dt*f/m+(x-x1)/dt"
);
integrator
.
addConstrainVelocities
();
integrator
.
setConstraintTolerance
(
1e-5
);
NonbondedForce
*
forceField
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
i
%
2
==
0
?
5.0
:
10.0
);
forceField
->
addParticle
((
i
%
2
==
0
?
0.2
:
-
0.2
),
0.5
,
5.0
);
}
// Constrain the first three particles with SHAKE.
system
.
addConstraint
(
0
,
1
,
1.0
);
system
.
addConstraint
(
1
,
2
,
1.0
);
// Constrain the next three with SETTLE.
system
.
addConstraint
(
3
,
4
,
1.0
);
system
.
addConstraint
(
5
,
4
,
1.0
);
system
.
addConstraint
(
3
,
5
,
sqrt
(
2.0
));
// Constraint the rest with CCMA.
for
(
int
i
=
6
;
i
<
numParticles
-
1
;
++
i
)
system
.
addConstraint
(
i
,
i
+
1
,
1.0
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
Vec3
>
velocities
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
positions
[
i
]
=
Vec3
(
i
/
2
,
(
i
+
1
)
/
2
,
0
);
velocities
[
i
]
=
Vec3
(
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
);
}
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
// Simulate it and see whether the constraints remain satisfied.
double
initialEnergy
=
0.0
;
for
(
int
i
=
0
;
i
<
1000
;
++
i
)
{
integrator
.
step
(
2
);
State
state
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Energy
);
for
(
int
j
=
0
;
j
<
system
.
getNumConstraints
();
++
j
)
{
int
particle1
,
particle2
;
double
distance
;
system
.
getConstraintParameters
(
j
,
particle1
,
particle2
,
distance
);
Vec3
p1
=
state
.
getPositions
()[
particle1
];
Vec3
p2
=
state
.
getPositions
()[
particle2
];
double
dist
=
std
::
sqrt
((
p1
[
0
]
-
p2
[
0
])
*
(
p1
[
0
]
-
p2
[
0
])
+
(
p1
[
1
]
-
p2
[
1
])
*
(
p1
[
1
]
-
p2
[
1
])
+
(
p1
[
2
]
-
p2
[
2
])
*
(
p1
[
2
]
-
p2
[
2
]));
ASSERT_EQUAL_TOL
(
distance
,
dist
,
2e-5
);
if
(
i
>
0
)
{
Vec3
v1
=
state
.
getVelocities
()[
particle1
];
Vec3
v2
=
state
.
getVelocities
()[
particle2
];
double
vel
=
(
v1
-
v2
).
dot
(
p1
-
p2
);
ASSERT_EQUAL_TOL
(
0.0
,
vel
,
2e-5
);
}
}
double
energy
=
state
.
getKineticEnergy
()
+
state
.
getPotentialEnergy
();
if
(
i
==
0
)
initialEnergy
=
energy
;
else
if
(
i
>
0
)
ASSERT_EQUAL_TOL
(
initialEnergy
,
energy
,
0.01
);
}
}
/**
* Test an integrator with an AndersenThermostat to see if updateContextState()
* is being handled correctly.
*/
void
testWithThermostat
()
{
const
int
numParticles
=
8
;
const
double
temp
=
100.0
;
const
double
collisionFreq
=
10.0
;
const
int
numSteps
=
10000
;
CudaPlatform
platform
;
System
system
;
CustomIntegrator
integrator
(
0.005
);
integrator
.
addUpdateContextState
();
integrator
.
addComputePerDof
(
"v"
,
"v+dt*f/m"
);
integrator
.
addComputePerDof
(
"x"
,
"x+dt*v"
);
NonbondedForce
*
forceField
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
2.0
);
forceField
->
addParticle
((
i
%
2
==
0
?
1.0
:
-
1.0
),
1.0
,
5.0
);
}
system
.
addForce
(
forceField
);
AndersenThermostat
*
thermostat
=
new
AndersenThermostat
(
temp
,
collisionFreq
);
system
.
addForce
(
thermostat
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
positions
[
i
]
=
Vec3
((
i
%
2
==
0
?
2
:
-
2
),
(
i
%
4
<
2
?
2
:
-
2
),
(
i
<
4
?
2
:
-
2
));
context
.
setPositions
(
positions
);
// Let it equilibrate.
integrator
.
step
(
10000
);
// Now run it for a while and see if the temperature is correct.
double
ke
=
0.0
;
for
(
int
i
=
0
;
i
<
numSteps
;
++
i
)
{
State
state
=
context
.
getState
(
State
::
Energy
);
ke
+=
state
.
getKineticEnergy
();
integrator
.
step
(
1
);
}
ke
/=
numSteps
;
double
expected
=
0.5
*
numParticles
*
3
*
BOLTZ
*
temp
;
ASSERT_USUALLY_EQUAL_TOL
(
expected
,
ke
,
6
/
std
::
sqrt
((
double
)
numSteps
));
}
/**
* Test a Monte Carlo integrator that uses global variables and depends on energy.
*/
void
testMonteCarlo
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
CustomIntegrator
integrator
(
0.1
);
const
double
kT
=
BOLTZ
*
300.0
;
integrator
.
addGlobalVariable
(
"kT"
,
kT
);
integrator
.
addGlobalVariable
(
"oldE"
,
0
);
integrator
.
addGlobalVariable
(
"accept"
,
0
);
integrator
.
addPerDofVariable
(
"oldx"
,
0
);
integrator
.
addComputeGlobal
(
"oldE"
,
"energy"
);
integrator
.
addComputePerDof
(
"oldx"
,
"x"
);
integrator
.
addComputePerDof
(
"x"
,
"x+dt*gaussian"
);
integrator
.
addComputeGlobal
(
"accept"
,
"step(exp((oldE-energy)/kT)-uniform)"
);
integrator
.
addComputePerDof
(
"x"
,
"accept*x + (1-accept)*oldx"
);
HarmonicBondForce
*
forceField
=
new
HarmonicBondForce
();
forceField
->
addBond
(
0
,
1
,
2.0
,
10.0
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
-
1
,
0
,
0
);
positions
[
1
]
=
Vec3
(
1
,
0
,
0
);
context
.
setPositions
(
positions
);
// Compute the histogram of distances and see if it satisfies a Boltzmann distribution.
const
int
numBins
=
100
;
const
double
maxDist
=
4.0
;
const
int
numIterations
=
5000
;
vector
<
int
>
counts
(
numBins
,
0
);
for
(
int
i
=
0
;
i
<
numIterations
;
++
i
)
{
integrator
.
step
(
10
);
State
state
=
context
.
getState
(
State
::
Positions
);
Vec3
delta
=
state
.
getPositions
()[
0
]
-
state
.
getPositions
()[
1
];
double
dist
=
sqrt
(
delta
.
dot
(
delta
));
if
(
dist
<
maxDist
)
counts
[(
int
)
(
numBins
*
dist
/
maxDist
)]
++
;
}
vector
<
double
>
expected
(
numBins
,
0
);
double
sum
=
0
;
for
(
int
i
=
0
;
i
<
numBins
;
i
++
)
{
double
dist
=
(
i
+
0.5
)
*
maxDist
/
numBins
;
expected
[
i
]
=
dist
*
dist
*
exp
(
-
5.0
*
(
dist
-
2
)
*
(
dist
-
2
)
/
kT
);
sum
+=
expected
[
i
];
}
for
(
int
i
=
0
;
i
<
numBins
;
i
++
)
ASSERT_USUALLY_EQUAL_TOL
((
double
)
counts
[
i
]
/
numIterations
,
expected
[
i
]
/
sum
,
0.01
);
}
/**
* Test the ComputeSum operation.
*/
void
testSum
()
{
const
int
numParticles
=
200
;
const
double
boxSize
=
10
;
CudaPlatform
platform
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
NonbondedForce
*
nb
=
new
NonbondedForce
();
system
.
addForce
(
nb
);
vector
<
Vec3
>
positions
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
1.5
);
nb
->
addParticle
(
i
%
2
==
0
?
1
:
-
1
,
0.1
,
1
);
bool
close
=
true
;
while
(
close
)
{
positions
[
i
]
=
Vec3
(
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
));
close
=
false
;
for
(
int
j
=
0
;
j
<
i
;
++
j
)
{
Vec3
delta
=
positions
[
i
]
-
positions
[
j
];
if
(
delta
.
dot
(
delta
)
<
0.1
)
close
=
true
;
}
}
}
CustomIntegrator
integrator
(
0.01
);
integrator
.
addGlobalVariable
(
"ke"
,
0
);
integrator
.
addComputePerDof
(
"v"
,
"v+dt*f/m"
);
integrator
.
addComputePerDof
(
"x"
,
"x+dt*v"
);
integrator
.
addComputeSum
(
"ke"
,
"m*v*v/2"
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
// See if the sum is being computed correctly.
State
state
=
context
.
getState
(
State
::
Energy
);
const
double
initialEnergy
=
state
.
getKineticEnergy
()
+
state
.
getPotentialEnergy
();
for
(
int
i
=
0
;
i
<
100
;
++
i
)
{
state
=
context
.
getState
(
State
::
Energy
);
ASSERT_EQUAL_TOL
(
state
.
getKineticEnergy
(),
integrator
.
getGlobalVariable
(
0
),
1e-5
);
integrator
.
step
(
1
);
}
}
/**
* Test an integrator that both uses and modifies a context parameter.
*/
void
testParameter
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
AndersenThermostat
*
thermostat
=
new
AndersenThermostat
(
0.1
,
0.1
);
system
.
addForce
(
thermostat
);
CustomIntegrator
integrator
(
0.1
);
integrator
.
addGlobalVariable
(
"temp"
,
0
);
integrator
.
addComputeGlobal
(
"temp"
,
"AndersenTemperature"
);
integrator
.
addComputeGlobal
(
"AndersenTemperature"
,
"temp*2"
);
Context
context
(
system
,
integrator
,
platform
);
// See if the parameter is being used correctly.
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
integrator
.
step
(
1
);
ASSERT_EQUAL_TOL
(
context
.
getParameter
(
"AndersenTemperature"
),
0.1
*
(
1
<<
(
i
+
1
)),
1e-5
);
}
}
/**
* Test random number distributions.
*/
void
testRandomDistributions
()
{
const
int
numParticles
=
100
;
const
int
numBins
=
20
;
const
int
numSteps
=
100
;
CudaPlatform
platform
;
System
system
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
CustomIntegrator
integrator
(
0.1
);
integrator
.
addPerDofVariable
(
"a"
,
0
);
integrator
.
addPerDofVariable
(
"b"
,
0
);
integrator
.
addComputePerDof
(
"a"
,
"uniform"
);
integrator
.
addComputePerDof
(
"b"
,
"gaussian"
);
Context
context
(
system
,
integrator
,
platform
);
// See if the random numbers are distributed correctly.
vector
<
int
>
bins
(
numBins
);
double
mean
=
0.0
;
double
var
=
0.0
;
double
skew
=
0.0
;
double
kurtosis
=
0.0
;
vector
<
Vec3
>
values
;
for
(
int
i
=
0
;
i
<
numSteps
;
i
++
)
{
integrator
.
step
(
1
);
integrator
.
getPerDofVariable
(
0
,
values
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
double
v
=
values
[
i
][
j
];
ASSERT
(
v
>=
0
&&
v
<
1
);
bins
[(
int
)
(
v
*
numBins
)]
++
;
}
integrator
.
getPerDofVariable
(
1
,
values
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
double
v
=
values
[
i
][
j
];
mean
+=
v
;
var
+=
v
*
v
;
skew
+=
v
*
v
*
v
;
kurtosis
+=
v
*
v
*
v
*
v
;
}
}
// Check the distribution of uniform randoms.
int
numValues
=
numParticles
*
numSteps
*
3
;
double
expected
=
numValues
/
(
double
)
numBins
;
double
tol
=
4
*
sqrt
(
expected
);
for
(
int
i
=
0
;
i
<
numBins
;
i
++
)
ASSERT
(
bins
[
i
]
>=
expected
-
tol
&&
bins
[
i
]
<=
expected
+
tol
);
// Check the distribution of gaussian randoms.
mean
/=
numValues
;
var
/=
numValues
;
skew
/=
numValues
;
kurtosis
/=
numValues
;
double
c2
=
var
-
mean
*
mean
;
double
c3
=
skew
-
3
*
var
*
mean
+
2
*
mean
*
mean
*
mean
;
double
c4
=
kurtosis
-
4
*
skew
*
mean
-
3
*
var
*
var
+
12
*
var
*
mean
*
mean
-
6
*
mean
*
mean
*
mean
*
mean
;
ASSERT_EQUAL_TOL
(
0.0
,
mean
,
3.0
/
sqrt
((
double
)
numValues
));
ASSERT_EQUAL_TOL
(
1.0
,
c2
,
3.0
/
pow
(
numValues
,
1.0
/
3.0
));
ASSERT_EQUAL_TOL
(
0.0
,
c3
,
3.0
/
pow
(
numValues
,
1.0
/
4.0
));
ASSERT_EQUAL_TOL
(
0.0
,
c4
,
3.0
/
pow
(
numValues
,
1.0
/
4.0
));
}
/**
* Test getting and setting per-DOF variables.
*/
void
testPerDofVariables
()
{
const
int
numParticles
=
200
;
const
double
boxSize
=
10
;
CudaPlatform
platform
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
NonbondedForce
*
nb
=
new
NonbondedForce
();
system
.
addForce
(
nb
);
nb
->
setNonbondedMethod
(
NonbondedForce
::
CutoffNonPeriodic
);
vector
<
Vec3
>
positions
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
1.5
);
nb
->
addParticle
(
i
%
2
==
0
?
1
:
-
1
,
0.1
,
1
);
bool
close
=
true
;
while
(
close
)
{
positions
[
i
]
=
Vec3
(
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
));
close
=
false
;
for
(
int
j
=
0
;
j
<
i
;
++
j
)
{
Vec3
delta
=
positions
[
i
]
-
positions
[
j
];
if
(
delta
.
dot
(
delta
)
<
0.1
)
close
=
true
;
}
}
}
CustomIntegrator
integrator
(
0.01
);
integrator
.
addPerDofVariable
(
"temp"
,
0
);
integrator
.
addPerDofVariable
(
"pos"
,
0
);
integrator
.
addComputePerDof
(
"v"
,
"v+dt*f/m"
);
integrator
.
addComputePerDof
(
"x"
,
"x+dt*v"
);
integrator
.
addComputePerDof
(
"pos"
,
"x"
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
vector
<
Vec3
>
initialValues
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
initialValues
[
i
]
=
Vec3
(
i
+
0.1
,
i
+
0.2
,
i
+
0.3
);
integrator
.
setPerDofVariable
(
0
,
initialValues
);
// Run a simulation, then query per-DOF values and see if they are correct.
vector
<
Vec3
>
values
;
context
.
getState
(
State
::
Forces
);
// Cause atom reordering to happen before the first step
for
(
int
i
=
0
;
i
<
200
;
++
i
)
{
integrator
.
step
(
1
);
State
state
=
context
.
getState
(
State
::
Positions
);
integrator
.
getPerDofVariable
(
0
,
values
);
for
(
int
j
=
0
;
j
<
numParticles
;
j
++
)
ASSERT_EQUAL_VEC
(
initialValues
[
j
],
values
[
j
],
1e-5
);
integrator
.
getPerDofVariable
(
1
,
values
);
for
(
int
j
=
0
;
j
<
numParticles
;
j
++
)
ASSERT_EQUAL_VEC
(
state
.
getPositions
()[
j
],
values
[
j
],
1e-5
);
}
}
/**
* Test evaluating force groups separately.
*/
void
testForceGroups
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
2.0
);
system
.
addParticle
(
2.0
);
CustomIntegrator
integrator
(
0.01
);
integrator
.
addPerDofVariable
(
"outf"
,
0
);
integrator
.
addPerDofVariable
(
"outf1"
,
0
);
integrator
.
addPerDofVariable
(
"outf2"
,
0
);
integrator
.
addGlobalVariable
(
"oute"
,
0
);
integrator
.
addGlobalVariable
(
"oute1"
,
0
);
integrator
.
addGlobalVariable
(
"oute2"
,
0
);
integrator
.
addComputePerDof
(
"outf"
,
"f"
);
integrator
.
addComputePerDof
(
"outf1"
,
"f1"
);
integrator
.
addComputePerDof
(
"outf2"
,
"f2"
);
integrator
.
addComputeGlobal
(
"oute"
,
"energy"
);
integrator
.
addComputeGlobal
(
"oute1"
,
"energy1"
);
integrator
.
addComputeGlobal
(
"oute2"
,
"energy2"
);
HarmonicBondForce
*
bonds
=
new
HarmonicBondForce
();
bonds
->
addBond
(
0
,
1
,
1.5
,
1.1
);
bonds
->
setForceGroup
(
1
);
system
.
addForce
(
bonds
);
NonbondedForce
*
nb
=
new
NonbondedForce
();
nb
->
addParticle
(
0.2
,
1
,
0
);
nb
->
addParticle
(
0.2
,
1
,
0
);
nb
->
setForceGroup
(
2
);
system
.
addForce
(
nb
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
-
1
,
0
,
0
);
positions
[
1
]
=
Vec3
(
1
,
0
,
0
);
context
.
setPositions
(
positions
);
// See if the various forces are computed correctly.
integrator
.
step
(
1
);
vector
<
Vec3
>
f
,
f1
,
f2
;
double
e1
=
0.5
*
1.1
*
0.5
*
0.5
;
double
e2
=
138.935456
*
0.2
*
0.2
/
2.0
;
integrator
.
getPerDofVariable
(
0
,
f
);
integrator
.
getPerDofVariable
(
1
,
f1
);
integrator
.
getPerDofVariable
(
2
,
f2
);
ASSERT_EQUAL_VEC
(
Vec3
(
1.1
*
0.5
,
0
,
0
),
f1
[
0
],
1e-5
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
1.1
*
0.5
,
0
,
0
),
f1
[
1
],
1e-5
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
138.935456
*
0.2
*
0.2
/
4.0
,
0
,
0
),
f2
[
0
],
1e-5
);
ASSERT_EQUAL_VEC
(
Vec3
(
138.935456
*
0.2
*
0.2
/
4.0
,
0
,
0
),
f2
[
1
],
1e-5
);
ASSERT_EQUAL_VEC
(
f1
[
0
]
+
f2
[
0
],
f
[
0
],
1e-5
);
ASSERT_EQUAL_VEC
(
f1
[
1
]
+
f2
[
1
],
f
[
1
],
1e-5
);
ASSERT_EQUAL_TOL
(
e1
,
integrator
.
getGlobalVariable
(
1
),
1e-5
);
ASSERT_EQUAL_TOL
(
e2
,
integrator
.
getGlobalVariable
(
2
),
1e-5
);
ASSERT_EQUAL_TOL
(
e1
+
e2
,
integrator
.
getGlobalVariable
(
0
),
1e-5
);
// Make sure they also match the values returned by the Context.
State
s
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
,
false
);
State
s1
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
,
false
,
2
);
State
s2
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
,
false
,
4
);
vector
<
Vec3
>
c
,
c1
,
c2
;
c
=
context
.
getState
(
State
::
Forces
,
false
).
getForces
();
c1
=
context
.
getState
(
State
::
Forces
,
false
,
2
).
getForces
();
c2
=
context
.
getState
(
State
::
Forces
,
false
,
4
).
getForces
();
ASSERT_EQUAL_VEC
(
f
[
0
],
c
[
0
],
1e-5
);
ASSERT_EQUAL_VEC
(
f
[
1
],
c
[
1
],
1e-5
);
ASSERT_EQUAL_VEC
(
f1
[
0
],
c1
[
0
],
1e-5
);
ASSERT_EQUAL_VEC
(
f1
[
1
],
c1
[
1
],
1e-5
);
ASSERT_EQUAL_VEC
(
f2
[
0
],
c2
[
0
],
1e-5
);
ASSERT_EQUAL_VEC
(
f2
[
1
],
c2
[
1
],
1e-5
);
ASSERT_EQUAL_TOL
(
s
.
getPotentialEnergy
(),
integrator
.
getGlobalVariable
(
0
),
1e-5
);
ASSERT_EQUAL_TOL
(
s1
.
getPotentialEnergy
(),
integrator
.
getGlobalVariable
(
1
),
1e-5
);
ASSERT_EQUAL_TOL
(
s2
.
getPotentialEnergy
(),
integrator
.
getGlobalVariable
(
2
),
1e-5
);
}
/**
* Test a multiple time step r-RESPA integrator.
*/
void
testRespa
()
{
const
int
numParticles
=
8
;
CudaPlatform
platform
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
4
,
0
,
0
),
Vec3
(
0
,
4
,
0
),
Vec3
(
0
,
0
,
4
));
CustomIntegrator
integrator
(
0.002
);
integrator
.
addComputePerDof
(
"v"
,
"v+0.5*dt*f1/m"
);
for
(
int
i
=
0
;
i
<
2
;
i
++
)
{
integrator
.
addComputePerDof
(
"v"
,
"v+0.5*(dt/2)*f0/m"
);
integrator
.
addComputePerDof
(
"x"
,
"x+(dt/2)*v"
);
integrator
.
addComputePerDof
(
"v"
,
"v+0.5*(dt/2)*f0/m"
);
}
integrator
.
addComputePerDof
(
"v"
,
"v+0.5*dt*f1/m"
);
HarmonicBondForce
*
bonds
=
new
HarmonicBondForce
();
for
(
int
i
=
0
;
i
<
numParticles
-
2
;
i
++
)
bonds
->
addBond
(
i
,
i
+
1
,
1.0
,
0.5
);
system
.
addForce
(
bonds
);
NonbondedForce
*
nb
=
new
NonbondedForce
();
nb
->
setCutoffDistance
(
2.0
);
nb
->
setNonbondedMethod
(
NonbondedForce
::
Ewald
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
i
%
2
==
0
?
5.0
:
10.0
);
nb
->
addParticle
((
i
%
2
==
0
?
0.2
:
-
0.2
),
0.5
,
5.0
);
}
nb
->
setForceGroup
(
1
);
nb
->
setReciprocalSpaceForceGroup
(
0
);
system
.
addForce
(
nb
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
Vec3
>
velocities
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
positions
[
i
]
=
Vec3
(
i
/
2
,
(
i
+
1
)
/
2
,
0
);
velocities
[
i
]
=
Vec3
(
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
);
}
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
// Simulate it and monitor energy conservations.
double
initialEnergy
=
0.0
;
for
(
int
i
=
0
;
i
<
1000
;
++
i
)
{
State
state
=
context
.
getState
(
State
::
Energy
);
double
energy
=
state
.
getKineticEnergy
()
+
state
.
getPotentialEnergy
();
if
(
i
==
1
)
initialEnergy
=
energy
;
else
if
(
i
>
1
)
ASSERT_EQUAL_TOL
(
initialEnergy
,
energy
,
0.05
);
integrator
.
step
(
2
);
}
}
int
main
()
{
try
{
testSingleBond
();
testConstraints
();
testVelocityConstraints
();
testWithThermostat
();
testMonteCarlo
();
testSum
();
testParameter
();
testRandomDistributions
();
testPerDofVariables
();
testForceGroups
();
testRespa
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
platforms/cuda2/tests/TestCudaCustomNonbondedForce.cpp
0 → 100644
View file @
bd22eada
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests all the different force terms in the CUDA implementation of CustomNonbondedForce.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "sfmt/SFMT.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/CustomNonbondedForce.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
const
double
TOL
=
1e-5
;
void
testSimpleExpression
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
forceField
=
new
CustomNonbondedForce
(
"-0.1*r^3"
);
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
addParticle
(
vector
<
double
>
());
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
2
,
0
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
double
force
=
0.1
*
3
*
(
2
*
2
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
force
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
force
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_TOL
(
-
0.1
*
(
2
*
2
*
2
),
state
.
getPotentialEnergy
(),
TOL
);
}
void
testParameters
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
forceField
=
new
CustomNonbondedForce
(
"scale*a*(r*b)^3; a=a1*a2; b=c+b1+b2"
);
forceField
->
addPerParticleParameter
(
"a"
);
forceField
->
addPerParticleParameter
(
"b"
);
forceField
->
addGlobalParameter
(
"scale"
,
3.0
);
forceField
->
addGlobalParameter
(
"c"
,
-
1.0
);
vector
<
double
>
params
(
2
);
params
[
0
]
=
1.5
;
params
[
1
]
=
2.0
;
forceField
->
addParticle
(
params
);
params
[
0
]
=
2.0
;
params
[
1
]
=
3.0
;
forceField
->
addParticle
(
params
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
2
,
0
,
0
);
context
.
setPositions
(
positions
);
context
.
setParameter
(
"scale"
,
1.0
);
context
.
setParameter
(
"c"
,
0.0
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
vector
<
Vec3
>
forces
=
state
.
getForces
();
double
force
=
-
3.0
*
3
*
5.0
*
(
10
*
10
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
force
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
force
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_TOL
(
3.0
*
(
10
*
10
*
10
),
state
.
getPotentialEnergy
(),
TOL
);
// Try changing the global parameters and make sure it's still correct.
context
.
setParameter
(
"scale"
,
1.5
);
context
.
setParameter
(
"c"
,
1.0
);
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
forces
=
state
.
getForces
();
force
=
-
1.5
*
3.0
*
3
*
6.0
*
(
12
*
12
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
force
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
force
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_TOL
(
1.5
*
3.0
*
(
12
*
12
*
12
),
state
.
getPotentialEnergy
(),
TOL
);
// Try changing the per-particle parameters and make sure it's still correct.
params
[
0
]
=
1.6
;
params
[
1
]
=
2.1
;
forceField
->
setParticleParameters
(
0
,
params
);
params
[
0
]
=
1.9
;
params
[
1
]
=
2.8
;
forceField
->
setParticleParameters
(
1
,
params
);
forceField
->
updateParametersInContext
(
context
);
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
forces
=
state
.
getForces
();
force
=
-
1.5
*
1.6
*
1.9
*
3
*
5.9
*
(
11.8
*
11.8
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
force
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
force
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_TOL
(
1.5
*
1.6
*
1.9
*
(
11.8
*
11.8
*
11.8
),
state
.
getPotentialEnergy
(),
TOL
);
}
void
testManyParameters
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
forceField
=
new
CustomNonbondedForce
(
"(a1*a2+b1*b2+c1*c2+d1*d2+e1*e2)*r"
);
forceField
->
addPerParticleParameter
(
"a"
);
forceField
->
addPerParticleParameter
(
"b"
);
forceField
->
addPerParticleParameter
(
"c"
);
forceField
->
addPerParticleParameter
(
"d"
);
forceField
->
addPerParticleParameter
(
"e"
);
vector
<
double
>
params
(
5
);
params
[
0
]
=
1.0
;
params
[
1
]
=
2.0
;
params
[
2
]
=
3.0
;
params
[
3
]
=
4.0
;
params
[
4
]
=
5.0
;
forceField
->
addParticle
(
params
);
params
[
0
]
=
1.1
;
params
[
1
]
=
1.2
;
params
[
2
]
=
1.3
;
params
[
3
]
=
1.4
;
params
[
4
]
=
1.5
;
forceField
->
addParticle
(
params
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
2
,
0
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
vector
<
Vec3
>
forces
=
state
.
getForces
();
double
force
=
1
*
1.1
+
2
*
1.2
+
3
*
1.3
+
4
*
1.4
+
5
*
1.5
;
ASSERT_EQUAL_VEC
(
Vec3
(
force
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
force
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_TOL
(
2
*
force
,
state
.
getPotentialEnergy
(),
TOL
);
}
void
testExclusions
()
{
CudaPlatform
platform
;
System
system
;
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
nonbonded
=
new
CustomNonbondedForce
(
"a*r; a=a1+a2"
);
nonbonded
->
addPerParticleParameter
(
"a"
);
vector
<
double
>
params
(
1
);
vector
<
Vec3
>
positions
(
4
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
system
.
addParticle
(
1.0
);
params
[
0
]
=
i
+
1
;
nonbonded
->
addParticle
(
params
);
positions
[
i
]
=
Vec3
(
i
,
0
,
0
);
}
nonbonded
->
addExclusion
(
0
,
1
);
nonbonded
->
addExclusion
(
1
,
2
);
nonbonded
->
addExclusion
(
2
,
3
);
nonbonded
->
addExclusion
(
0
,
2
);
nonbonded
->
addExclusion
(
1
,
3
);
system
.
addForce
(
nonbonded
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
ASSERT_EQUAL_VEC
(
Vec3
(
1
+
4
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
forces
[
2
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
(
1
+
4
),
0
,
0
),
forces
[
3
],
TOL
);
ASSERT_EQUAL_TOL
((
1
+
4
)
*
3.0
,
state
.
getPotentialEnergy
(),
TOL
);
}
void
testCutoff
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
forceField
=
new
CustomNonbondedForce
(
"r"
);
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
setNonbondedMethod
(
CustomNonbondedForce
::
CutoffNonPeriodic
);
forceField
->
setCutoffDistance
(
2.5
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
3
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
0
,
2
,
0
);
positions
[
2
]
=
Vec3
(
0
,
3
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
1
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
-
1
,
0
),
forces
[
2
],
TOL
);
ASSERT_EQUAL_TOL
(
2.0
+
1.0
,
state
.
getPotentialEnergy
(),
TOL
);
}
void
testPeriodic
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
forceField
=
new
CustomNonbondedForce
(
"r"
);
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
setNonbondedMethod
(
CustomNonbondedForce
::
CutoffPeriodic
);
forceField
->
setCutoffDistance
(
2.0
);
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
4
,
0
,
0
),
Vec3
(
0
,
4
,
0
),
Vec3
(
0
,
0
,
4
));
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
3
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
0
,
2.1
,
0
);
positions
[
2
]
=
Vec3
(
0
,
3
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
-
2
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
2
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
forces
[
2
],
TOL
);
ASSERT_EQUAL_TOL
(
1.9
+
1
+
0.9
,
state
.
getPotentialEnergy
(),
TOL
);
}
void
testTabulatedFunction
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
forceField
=
new
CustomNonbondedForce
(
"fn(r)+1"
);
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
addParticle
(
vector
<
double
>
());
vector
<
double
>
table
;
for
(
int
i
=
0
;
i
<
21
;
i
++
)
table
.
push_back
(
std
::
sin
(
0.25
*
i
));
forceField
->
addFunction
(
"fn"
,
table
,
1.0
,
6.0
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
double
tol
=
0.01
;
for
(
int
i
=
1
;
i
<
30
;
i
++
)
{
double
x
=
(
7.0
/
30.0
)
*
i
;
positions
[
1
]
=
Vec3
(
x
,
0
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
double
force
=
(
x
<
1.0
||
x
>
6.0
?
0.0
:
-
std
::
cos
(
x
-
1.0
));
double
energy
=
(
x
<
1.0
||
x
>
6.0
?
0.0
:
std
::
sin
(
x
-
1.0
))
+
1.0
;
ASSERT_EQUAL_VEC
(
Vec3
(
-
force
,
0
,
0
),
forces
[
0
],
0.1
);
ASSERT_EQUAL_VEC
(
Vec3
(
force
,
0
,
0
),
forces
[
1
],
0.1
);
ASSERT_EQUAL_TOL
(
energy
,
state
.
getPotentialEnergy
(),
0.02
);
}
for
(
int
i
=
1
;
i
<
20
;
i
++
)
{
double
x
=
0.25
*
i
+
1.0
;
positions
[
1
]
=
Vec3
(
x
,
0
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Energy
);
double
energy
=
(
x
<
1.0
||
x
>
6.0
?
0.0
:
std
::
sin
(
x
-
1.0
))
+
1.0
;
ASSERT_EQUAL_TOL
(
energy
,
state
.
getPotentialEnergy
(),
1e-4
);
}
}
void
testCoulombLennardJones
()
{
const
int
numMolecules
=
300
;
const
int
numParticles
=
numMolecules
*
2
;
const
double
boxSize
=
20.0
;
CudaPlatform
platform
;
// Create two systems: one with a NonbondedForce, and one using a CustomNonbondedForce to implement the same interaction.
System
standardSystem
;
System
customSystem
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
standardSystem
.
addParticle
(
1.0
);
customSystem
.
addParticle
(
1.0
);
}
NonbondedForce
*
standardNonbonded
=
new
NonbondedForce
();
CustomNonbondedForce
*
customNonbonded
=
new
CustomNonbondedForce
(
"4*eps*((sigma/r)^12-(sigma/r)^6)+138.935456*q/r; q=q1*q2; sigma=0.5*(sigma1+sigma2); eps=sqrt(eps1*eps2)"
);
customNonbonded
->
addPerParticleParameter
(
"q"
);
customNonbonded
->
addPerParticleParameter
(
"sigma"
);
customNonbonded
->
addPerParticleParameter
(
"eps"
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
Vec3
>
velocities
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
vector
<
double
>
params
(
3
);
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
if
(
i
<
numMolecules
/
2
)
{
standardNonbonded
->
addParticle
(
1.0
,
0.2
,
0.1
);
params
[
0
]
=
1.0
;
params
[
1
]
=
0.2
;
params
[
2
]
=
0.1
;
customNonbonded
->
addParticle
(
params
);
standardNonbonded
->
addParticle
(
-
1.0
,
0.1
,
0.1
);
params
[
0
]
=
-
1.0
;
params
[
1
]
=
0.1
;
customNonbonded
->
addParticle
(
params
);
}
else
{
standardNonbonded
->
addParticle
(
1.0
,
0.2
,
0.2
);
params
[
0
]
=
1.0
;
params
[
1
]
=
0.2
;
params
[
2
]
=
0.2
;
customNonbonded
->
addParticle
(
params
);
standardNonbonded
->
addParticle
(
-
1.0
,
0.1
,
0.2
);
params
[
0
]
=
-
1.0
;
params
[
1
]
=
0.1
;
customNonbonded
->
addParticle
(
params
);
}
positions
[
2
*
i
]
=
Vec3
(
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
));
positions
[
2
*
i
+
1
]
=
Vec3
(
positions
[
2
*
i
][
0
]
+
1.0
,
positions
[
2
*
i
][
1
],
positions
[
2
*
i
][
2
]);
velocities
[
2
*
i
]
=
Vec3
(
genrand_real2
(
sfmt
),
genrand_real2
(
sfmt
),
genrand_real2
(
sfmt
));
velocities
[
2
*
i
+
1
]
=
Vec3
(
genrand_real2
(
sfmt
),
genrand_real2
(
sfmt
),
genrand_real2
(
sfmt
));
standardNonbonded
->
addException
(
2
*
i
,
2
*
i
+
1
,
0.0
,
1.0
,
0.0
);
customNonbonded
->
addExclusion
(
2
*
i
,
2
*
i
+
1
);
}
standardNonbonded
->
setNonbondedMethod
(
NonbondedForce
::
NoCutoff
);
customNonbonded
->
setNonbondedMethod
(
CustomNonbondedForce
::
NoCutoff
);
standardSystem
.
addForce
(
standardNonbonded
);
customSystem
.
addForce
(
customNonbonded
);
VerletIntegrator
integrator1
(
0.01
);
VerletIntegrator
integrator2
(
0.01
);
Context
context1
(
standardSystem
,
integrator1
,
platform
);
Context
context2
(
customSystem
,
integrator2
,
platform
);
context1
.
setPositions
(
positions
);
context2
.
setPositions
(
positions
);
context1
.
setVelocities
(
velocities
);
context2
.
setVelocities
(
velocities
);
State
state1
=
context1
.
getState
(
State
::
Forces
|
State
::
Energy
);
State
state2
=
context2
.
getState
(
State
::
Forces
|
State
::
Energy
);
ASSERT_EQUAL_TOL
(
state1
.
getPotentialEnergy
(),
state2
.
getPotentialEnergy
(),
1e-4
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
i
],
state2
.
getForces
()[
i
],
1e-4
);
}
}
void
testParallelComputation
()
{
CudaPlatform
platform
;
System
system
;
const
int
numParticles
=
200
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
CustomNonbondedForce
*
force
=
new
CustomNonbondedForce
(
"4*eps*((sigma/r)^12-(sigma/r)^6); sigma=0.5; eps=1"
);
vector
<
double
>
params
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
force
->
addParticle
(
params
);
system
.
addForce
(
force
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
vector
<
Vec3
>
positions
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
positions
[
i
]
=
Vec3
(
5
*
genrand_real2
(
sfmt
),
5
*
genrand_real2
(
sfmt
),
5
*
genrand_real2
(
sfmt
));
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
for
(
int
j
=
0
;
j
<
i
;
++
j
)
{
Vec3
delta
=
positions
[
i
]
-
positions
[
j
];
if
(
delta
.
dot
(
delta
)
<
0.1
)
force
->
addExclusion
(
i
,
j
);
}
VerletIntegrator
integrator1
(
0.01
);
Context
context1
(
system
,
integrator1
,
platform
);
context1
.
setPositions
(
positions
);
State
state1
=
context1
.
getState
(
State
::
Forces
|
State
::
Energy
);
VerletIntegrator
integrator2
(
0.01
);
string
deviceIndex
=
platform
.
getPropertyValue
(
context1
,
CudaPlatform
::
CudaDeviceIndex
());
map
<
string
,
string
>
props
;
props
[
CudaPlatform
::
CudaDeviceIndex
()]
=
deviceIndex
+
","
+
deviceIndex
;
Context
context2
(
system
,
integrator2
,
platform
,
props
);
context2
.
setPositions
(
positions
);
State
state2
=
context2
.
getState
(
State
::
Forces
|
State
::
Energy
);
ASSERT_EQUAL_TOL
(
state1
.
getPotentialEnergy
(),
state2
.
getPotentialEnergy
(),
1e-5
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
i
],
state2
.
getForces
()[
i
],
1e-5
);
}
int
main
()
{
try
{
testSimpleExpression
();
testParameters
();
testManyParameters
();
testExclusions
();
testCutoff
();
testPeriodic
();
testTabulatedFunction
();
testCoulombLennardJones
();
// testParallelComputation();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment