Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
59c809c0
Commit
59c809c0
authored
Jul 16, 2015
by
peastman
Browse files
Began overhaul of CUDA CustomIntegrator in preparation for supporting flow control
parent
44b96f0c
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
230 additions
and
169 deletions
+230
-169
openmmapi/include/openmm/internal/CompiledExpressionSet.h
openmmapi/include/openmm/internal/CompiledExpressionSet.h
+7
-3
openmmapi/src/CompiledExpressionSet.cpp
openmmapi/src/CompiledExpressionSet.cpp
+6
-2
platforms/cpu/include/CpuCustomGBForce.h
platforms/cpu/include/CpuCustomGBForce.h
+1
-1
platforms/cpu/include/CpuCustomManyParticleForce.h
platforms/cpu/include/CpuCustomManyParticleForce.h
+1
-1
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+32
-2
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+181
-152
platforms/cuda/src/kernels/customIntegrator.cu
platforms/cuda/src/kernels/customIntegrator.cu
+2
-2
platforms/cuda/src/kernels/integrationUtilities.cu
platforms/cuda/src/kernels/integrationUtilities.cu
+0
-6
No files found.
platforms/cpu/include
/CompiledExpressionSet.h
→
openmmapi/include/openmm/internal
/CompiledExpressionSet.h
View file @
59c809c0
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2014 Stanford University and the Authors.
*
* Portions copyright (c) 2014
-2015
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -33,7 +33,7 @@
...
@@ -33,7 +33,7 @@
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
#include "lepton/CompiledExpression.h"
#include "lepton/CompiledExpression.h"
#include "windowsExport
Cpu
.h"
#include "windowsExport.h"
#include <string>
#include <string>
#include <vector>
#include <vector>
...
@@ -42,7 +42,7 @@ namespace OpenMM {
...
@@ -42,7 +42,7 @@ namespace OpenMM {
/**
/**
* This class simplifies the management of a set of related CompiledExpressions that share variables.
* This class simplifies the management of a set of related CompiledExpressions that share variables.
*/
*/
class
OPENMM_EXPORT
_CPU
CompiledExpressionSet
{
class
OPENMM_EXPORT
CompiledExpressionSet
{
public:
public:
CompiledExpressionSet
();
CompiledExpressionSet
();
/**
/**
...
@@ -60,6 +60,10 @@ public:
...
@@ -60,6 +60,10 @@ public:
* @param value the value to set it to
* @param value the value to set it to
*/
*/
void
setVariable
(
int
index
,
double
value
);
void
setVariable
(
int
index
,
double
value
);
/**
* Get the total number of variables for which indices have been allocated.
*/
int
getNumVariables
()
const
;
private:
private:
std
::
vector
<
Lepton
::
CompiledExpression
*>
expressions
;
std
::
vector
<
Lepton
::
CompiledExpression
*>
expressions
;
std
::
vector
<
std
::
string
>
variables
;
std
::
vector
<
std
::
string
>
variables
;
...
...
platforms/cpu
/src/CompiledExpressionSet.cpp
→
openmmapi
/src/CompiledExpressionSet.cpp
View file @
59c809c0
/* Portions copyright (c) 2014 Stanford University and Simbios.
/* Portions copyright (c) 2014
-2015
Stanford University and Simbios.
* Contributors: Peter Eastman
* Contributors: Peter Eastman
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* Permission is hereby granted, free of charge, to any person obtaining
...
@@ -21,7 +21,7 @@
...
@@ -21,7 +21,7 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
*/
#include "CompiledExpressionSet.h"
#include "
openmm/internal/
CompiledExpressionSet.h"
using
namespace
OpenMM
;
using
namespace
OpenMM
;
using
namespace
Lepton
;
using
namespace
Lepton
;
...
@@ -54,3 +54,7 @@ void CompiledExpressionSet::setVariable(int index, double value) {
...
@@ -54,3 +54,7 @@ void CompiledExpressionSet::setVariable(int index, double value) {
for
(
int
i
=
0
;
i
<
(
int
)
variableReferences
[
index
].
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
variableReferences
[
index
].
size
();
i
++
)
*
variableReferences
[
index
][
i
]
=
value
;
*
variableReferences
[
index
][
i
]
=
value
;
}
}
int
CompiledExpressionSet
::
getNumVariables
()
const
{
return
variables
.
size
();
}
platforms/cpu/include/CpuCustomGBForce.h
View file @
59c809c0
...
@@ -25,10 +25,10 @@
...
@@ -25,10 +25,10 @@
#ifndef OPENMM_CPU_CUSTOM_GB_FORCE_H__
#ifndef OPENMM_CPU_CUSTOM_GB_FORCE_H__
#define OPENMM_CPU_CUSTOM_GB_FORCE_H__
#define OPENMM_CPU_CUSTOM_GB_FORCE_H__
#include "CompiledExpressionSet.h"
#include "CpuNeighborList.h"
#include "CpuNeighborList.h"
#include "lepton/CompiledExpression.h"
#include "lepton/CompiledExpression.h"
#include "openmm/CustomGBForce.h"
#include "openmm/CustomGBForce.h"
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/vectorize.h"
#include "openmm/internal/vectorize.h"
#include <map>
#include <map>
...
...
platforms/cpu/include/CpuCustomManyParticleForce.h
View file @
59c809c0
...
@@ -27,9 +27,9 @@
...
@@ -27,9 +27,9 @@
#include "ReferenceForce.h"
#include "ReferenceForce.h"
#include "ReferenceBondIxn.h"
#include "ReferenceBondIxn.h"
#include "CompiledExpressionSet.h"
#include "CpuNeighborList.h"
#include "CpuNeighborList.h"
#include "openmm/CustomManyParticleForce.h"
#include "openmm/CustomManyParticleForce.h"
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/vectorize.h"
#include "openmm/internal/vectorize.h"
#include "lepton/CompiledExpression.h"
#include "lepton/CompiledExpression.h"
...
...
platforms/cuda/include/CudaKernels.h
View file @
59c809c0
...
@@ -35,6 +35,9 @@
...
@@ -35,6 +35,9 @@
#include "CudaSort.h"
#include "CudaSort.h"
#include "openmm/kernels.h"
#include "openmm/kernels.h"
#include "openmm/System.h"
#include "openmm/System.h"
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/CustomIntegratorUtilities.h"
#include "lepton/CompiledExpression.h"
#include <cufft.h>
#include <cufft.h>
namespace
OpenMM
{
namespace
OpenMM
{
...
@@ -1213,6 +1216,7 @@ private:
...
@@ -1213,6 +1216,7 @@ private:
*/
*/
class
CudaIntegrateCustomStepKernel
:
public
IntegrateCustomStepKernel
{
class
CudaIntegrateCustomStepKernel
:
public
IntegrateCustomStepKernel
{
public:
public:
enum
GlobalTargetType
{
DT
,
VARIABLE
,
PARAMETER
};
CudaIntegrateCustomStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
IntegrateCustomStepKernel
(
name
,
platform
),
cu
(
cu
),
CudaIntegrateCustomStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
IntegrateCustomStepKernel
(
name
,
platform
),
cu
(
cu
),
hasInitializedKernels
(
false
),
localValuesAreCurrent
(
false
),
globalValues
(
NULL
),
contextParameterValues
(
NULL
),
sumBuffer
(
NULL
),
potentialEnergy
(
NULL
),
hasInitializedKernels
(
false
),
localValuesAreCurrent
(
false
),
globalValues
(
NULL
),
contextParameterValues
(
NULL
),
sumBuffer
(
NULL
),
potentialEnergy
(
NULL
),
kineticEnergy
(
NULL
),
uniformRandoms
(
NULL
),
randomSeed
(
NULL
),
perDofValues
(
NULL
)
{
kineticEnergy
(
NULL
),
uniformRandoms
(
NULL
),
randomSeed
(
NULL
),
perDofValues
(
NULL
)
{
...
@@ -1279,15 +1283,17 @@ public:
...
@@ -1279,15 +1283,17 @@ public:
void
setPerDofVariable
(
ContextImpl
&
context
,
int
variable
,
const
std
::
vector
<
Vec3
>&
values
);
void
setPerDofVariable
(
ContextImpl
&
context
,
int
variable
,
const
std
::
vector
<
Vec3
>&
values
);
private:
private:
class
ReorderListener
;
class
ReorderListener
;
class
GlobalTarget
;
std
::
string
createGlobalComputation
(
const
std
::
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
CustomIntegrator
&
integrator
,
const
std
::
string
&
energyName
);
std
::
string
createGlobalComputation
(
const
std
::
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
CustomIntegrator
&
integrator
,
const
std
::
string
&
energyName
);
std
::
string
createPerDofComputation
(
const
std
::
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
int
component
,
CustomIntegrator
&
integrator
,
const
std
::
string
&
forceName
,
const
std
::
string
&
energyName
);
std
::
string
createPerDofComputation
(
const
std
::
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
int
component
,
CustomIntegrator
&
integrator
,
const
std
::
string
&
forceName
,
const
std
::
string
&
energyName
);
void
prepareForComputation
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
void
prepareForComputation
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
void
recordGlobalValue
(
double
value
,
GlobalTarget
target
);
void
recordChangedParameters
(
ContextImpl
&
context
);
void
recordChangedParameters
(
ContextImpl
&
context
);
CudaContext
&
cu
;
CudaContext
&
cu
;
double
prevStepSize
,
energy
;
double
prevStepSize
,
energy
;
float
energyFloat
;
float
energyFloat
;
int
numGlobalVariables
;
int
numGlobalVariables
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
modifiesParameters
,
keNeedsForce
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
deviceGlobalsAreCurrent
,
modifiesParameters
,
keNeedsForce
;
mutable
bool
localValuesAreCurrent
;
mutable
bool
localValuesAreCurrent
;
CudaArray
*
globalValues
;
CudaArray
*
globalValues
;
CudaArray
*
contextParameterValues
;
CudaArray
*
contextParameterValues
;
...
@@ -1303,19 +1309,43 @@ private:
...
@@ -1303,19 +1309,43 @@ private:
mutable
std
::
vector
<
std
::
vector
<
double
>
>
localPerDofValuesDouble
;
mutable
std
::
vector
<
std
::
vector
<
double
>
>
localPerDofValuesDouble
;
std
::
vector
<
float
>
contextValuesFloat
;
std
::
vector
<
float
>
contextValuesFloat
;
std
::
vector
<
double
>
contextValuesDouble
;
std
::
vector
<
double
>
contextValuesDouble
;
std
::
vector
<
float
>
globalValuesFloat
;
std
::
vector
<
double
>
globalValuesDouble
;
std
::
vector
<
double
>
initialGlobalVariables
;
std
::
vector
<
std
::
vector
<
CUfunction
>
>
kernels
;
std
::
vector
<
std
::
vector
<
CUfunction
>
>
kernels
;
std
::
vector
<
std
::
vector
<
std
::
vector
<
void
*>
>
>
kernelArgs
;
std
::
vector
<
std
::
vector
<
std
::
vector
<
void
*>
>
>
kernelArgs
;
std
::
vector
<
void
*>
kineticEnergyArgs
;
std
::
vector
<
void
*>
kineticEnergyArgs
;
CUfunction
randomKernel
,
kineticEnergyKernel
,
sumKineticEnergyKernel
;
CUfunction
randomKernel
,
kineticEnergyKernel
,
sumKineticEnergyKernel
;
std
::
vector
<
CustomIntegrator
::
ComputationType
>
stepType
;
std
::
vector
<
CustomIntegrator
::
ComputationType
>
stepType
;
std
::
vector
<
CustomIntegratorUtilities
::
Comparison
>
comparisons
;
std
::
vector
<
std
::
vector
<
Lepton
::
CompiledExpression
>
>
globalExpressions
;
CompiledExpressionSet
expressionSet
;
std
::
vector
<
bool
>
needsGlobals
;
std
::
vector
<
bool
>
needsForces
;
std
::
vector
<
bool
>
needsForces
;
std
::
vector
<
bool
>
needsEnergy
;
std
::
vector
<
bool
>
needsEnergy
;
std
::
vector
<
bool
>
computeBothForceAndEnergy
;
std
::
vector
<
bool
>
invalidatesForces
;
std
::
vector
<
bool
>
invalidatesForces
;
std
::
vector
<
bool
>
merged
;
std
::
vector
<
bool
>
merged
;
std
::
vector
<
int
>
forceGroup
;
std
::
vector
<
int
>
forceGroupFlags
;
std
::
vector
<
int
>
blockEnd
;
std
::
vector
<
int
>
requiredGaussian
;
std
::
vector
<
int
>
requiredGaussian
;
std
::
vector
<
int
>
requiredUniform
;
std
::
vector
<
int
>
requiredUniform
;
std
::
vector
<
int
>
stepEnergyVariableIndex
;
std
::
vector
<
int
>
globalVariableIndex
;
std
::
vector
<
int
>
parameterVariableIndex
;
int
gaussianVariableIndex
,
uniformVariableIndex
,
dtVariableIndex
;
std
::
vector
<
std
::
string
>
parameterNames
;
std
::
vector
<
std
::
string
>
parameterNames
;
std
::
vector
<
GlobalTarget
>
stepTarget
;
};
class
CudaIntegrateCustomStepKernel
::
GlobalTarget
{
public:
CudaIntegrateCustomStepKernel
::
GlobalTargetType
type
;
int
variableIndex
;
GlobalTarget
()
{
}
GlobalTarget
(
CudaIntegrateCustomStepKernel
::
GlobalTargetType
type
,
int
variableIndex
)
:
type
(
type
),
variableIndex
(
variableIndex
)
{
}
};
};
/**
/**
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
59c809c0
...
@@ -5696,7 +5696,6 @@ void CudaIntegrateCustomStepKernel::initialize(const System& system, const Custo
...
@@ -5696,7 +5696,6 @@ void CudaIntegrateCustomStepKernel::initialize(const System& system, const Custo
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
numGlobalVariables
=
integrator
.
getNumGlobalVariables
();
numGlobalVariables
=
integrator
.
getNumGlobalVariables
();
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
globalValues
=
new
CudaArray
(
cu
,
max
(
1
,
numGlobalVariables
),
elementSize
,
"globalVariables"
);
sumBuffer
=
new
CudaArray
(
cu
,
((
3
*
system
.
getNumParticles
()
+
3
)
/
4
)
*
4
,
elementSize
,
"sumBuffer"
);
sumBuffer
=
new
CudaArray
(
cu
,
((
3
*
system
.
getNumParticles
()
+
3
)
/
4
)
*
4
,
elementSize
,
"sumBuffer"
);
potentialEnergy
=
new
CudaArray
(
cu
,
1
,
cu
.
getEnergyBuffer
().
getElementSize
(),
"potentialEnergy"
);
potentialEnergy
=
new
CudaArray
(
cu
,
1
,
cu
.
getEnergyBuffer
().
getElementSize
(),
"potentialEnergy"
);
kineticEnergy
=
new
CudaArray
(
cu
,
1
,
elementSize
,
"kineticEnergy"
);
kineticEnergy
=
new
CudaArray
(
cu
,
1
,
elementSize
,
"kineticEnergy"
);
...
@@ -5764,11 +5763,11 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari
...
@@ -5764,11 +5763,11 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari
if
(
energyName
!=
""
)
if
(
energyName
!=
""
)
variables
[
energyName
]
=
"energy"
;
variables
[
energyName
]
=
"energy"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
cu
.
intToString
(
i
)
+
"]"
;
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
cu
.
intToString
(
globalVariableIndex
[
i
]
)
+
"]"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumPerDofVariables
();
i
++
)
for
(
int
i
=
0
;
i
<
integrator
.
getNumPerDofVariables
();
i
++
)
variables
[
integrator
.
getPerDofVariableName
(
i
)]
=
"perDof"
+
suffix
.
substr
(
1
)
+
perDofValues
->
getParameterSuffix
(
i
);
variables
[
integrator
.
getPerDofVariableName
(
i
)]
=
"perDof"
+
suffix
.
substr
(
1
)
+
perDofValues
->
getParameterSuffix
(
i
);
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
variables
[
parameterNames
[
i
]]
=
"
param
s["
+
cu
.
intToString
(
i
)
+
"]"
;
variables
[
parameterNames
[
i
]]
=
"
global
s["
+
cu
.
intToString
(
parameterVariableIndex
[
i
]
)
+
"]"
;
vector
<
const
TabulatedFunction
*>
functions
;
vector
<
const
TabulatedFunction
*>
functions
;
vector
<
pair
<
string
,
string
>
>
functionNames
;
vector
<
pair
<
string
,
string
>
>
functionNames
;
return
cu
.
getExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
functionNames
,
"temp"
+
cu
.
intToString
(
component
)
+
"_"
,
"double"
);
return
cu
.
getExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
functionNames
,
"temp"
+
cu
.
intToString
(
component
)
+
"_"
,
"double"
);
...
@@ -5808,10 +5807,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -5808,10 +5807,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
kernelArgs
.
resize
(
integrator
.
getNumComputations
());
kernelArgs
.
resize
(
integrator
.
getNumComputations
());
requiredGaussian
.
resize
(
integrator
.
getNumComputations
(),
0
);
requiredGaussian
.
resize
(
integrator
.
getNumComputations
(),
0
);
requiredUniform
.
resize
(
integrator
.
getNumComputations
(),
0
);
requiredUniform
.
resize
(
integrator
.
getNumComputations
(),
0
);
needs
Force
s
.
resize
(
numSteps
,
false
);
needs
Global
s
.
resize
(
numSteps
,
false
);
needsEnergy
.
resize
(
numSteps
,
false
);
globalExpressions
.
resize
(
numSteps
);
forceGroup
.
resize
(
numSteps
,
-
2
);
stepType
.
resize
(
numSteps
);
invalidatesForces
.
resize
(
numSteps
,
false
);
stepTarget
.
resize
(
numSteps
);
merged
.
resize
(
numSteps
,
false
);
merged
.
resize
(
numSteps
,
false
);
modifiesParameters
=
false
;
modifiesParameters
=
false
;
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
...
@@ -5819,24 +5818,40 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -5819,24 +5818,40 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
defines
[
"WORK_GROUP_SIZE"
]
=
cu
.
intToString
(
CudaContext
::
ThreadBlockSize
);
defines
[
"WORK_GROUP_SIZE"
]
=
cu
.
intToString
(
CudaContext
::
ThreadBlockSize
);
defines
[
"SUM_BUFFER_SIZE"
]
=
"0"
;
defines
[
"SUM_BUFFER_SIZE"
]
=
"0"
;
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
// Build a list of all variables that affect the forces, so we can tell which
// Record information about all the computation steps.
// steps invalidate them.
vector
<
string
>
variable
(
numSteps
);
set
<
string
>
affectsForce
;
vector
<
int
>
forceGroup
;
affectsForce
.
insert
(
"x"
);
vector
<
vector
<
Lepton
::
ParsedExpression
>
>
expression
;
for
(
vector
<
ForceImpl
*>::
const_iterator
iter
=
context
.
getForceImpls
().
begin
();
iter
!=
context
.
getForceImpls
().
end
();
++
iter
)
{
CustomIntegratorUtilities
::
analyzeComputations
(
context
,
integrator
,
expression
,
comparisons
,
blockEnd
,
invalidatesForces
,
needsForces
,
needsEnergy
,
computeBothForceAndEnergy
,
forceGroup
);
const
map
<
string
,
double
>
params
=
(
*
iter
)
->
getDefaultParameters
();
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
for
(
map
<
string
,
double
>::
const_iterator
param
=
params
.
begin
();
param
!=
params
.
end
();
++
param
)
string
expr
;
affectsForce
.
insert
(
param
->
first
);
integrator
.
getComputationStep
(
step
,
stepType
[
step
],
variable
[
step
],
expr
);
if
(
stepType
[
step
]
==
CustomIntegrator
::
BeginWhileBlock
)
blockEnd
[
blockEnd
[
step
]]
=
step
;
// Record where to branch back to.
if
(
stepType
[
step
]
==
CustomIntegrator
::
ComputeGlobal
||
stepType
[
step
]
==
CustomIntegrator
::
BeginIfBlock
||
stepType
[
step
]
==
CustomIntegrator
::
BeginWhileBlock
)
for
(
int
i
=
0
;
i
<
(
int
)
expression
[
step
].
size
();
i
++
)
globalExpressions
[
step
].
push_back
(
expression
[
step
][
i
].
createCompiledExpression
());
}
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
globalExpressions
[
step
].
size
();
i
++
)
expressionSet
.
registerExpression
(
globalExpressions
[
step
][
i
]);
}
}
// Record
information about all
the
c
omp
utation steps
.
// Record
the indices for variables in
the
C
omp
iledExpressionSet
.
stepType
.
resize
(
numSteps
);
gaussianVariableIndex
=
expressionSet
.
getVariableIndex
(
"gaussian"
);
vector
<
string
>
variable
(
numSteps
);
uniformVariableIndex
=
expressionSet
.
getVariableIndex
(
"uniform"
);
vector
<
Lepton
::
ParsedExpression
>
expression
(
numSteps
);
dtVariableIndex
=
expressionSet
.
getVariableIndex
(
"dt"
);
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
globalVariableIndex
.
push_back
(
expressionSet
.
getVariableIndex
(
integrator
.
getGlobalVariableName
(
i
)));
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
parameterVariableIndex
.
push_back
(
expressionSet
.
getVariableIndex
(
parameterNames
[
i
]));
// Record the variable names and flags for the force and energy in each step.
forceGroupFlags
.
resize
(
numSteps
,
-
1
);
vector
<
string
>
forceGroupName
;
vector
<
string
>
forceGroupName
;
vector
<
string
>
energyGroupName
;
vector
<
string
>
energyGroupName
;
for
(
int
i
=
0
;
i
<
32
;
i
++
)
{
for
(
int
i
=
0
;
i
<
32
;
i
++
)
{
...
@@ -5849,41 +5864,65 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -5849,41 +5864,65 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
}
}
vector
<
string
>
forceName
(
numSteps
,
"f"
);
vector
<
string
>
forceName
(
numSteps
,
"f"
);
vector
<
string
>
energyName
(
numSteps
,
"energy"
);
vector
<
string
>
energyName
(
numSteps
,
"energy"
);
stepEnergyVariableIndex
.
resize
(
numSteps
,
expressionSet
.
getVariableIndex
(
"energy"
));
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
string
expr
;
if
(
needsForces
[
step
]
&&
forceGroup
[
step
]
>
-
1
)
integrator
.
getComputationStep
(
step
,
stepType
[
step
],
variable
[
step
],
expr
);
forceName
[
step
]
=
forceGroupName
[
forceGroup
[
step
]];
if
(
expr
.
size
()
>
0
)
{
if
(
needsEnergy
[
step
]
&&
forceGroup
[
step
]
>
-
1
)
{
expression
[
step
]
=
Lepton
::
Parser
::
parse
(
expr
).
optimize
();
energyName
[
step
]
=
energyGroupName
[
forceGroup
[
step
]];
if
(
usesVariable
(
expression
[
step
],
"f"
))
{
stepEnergyVariableIndex
[
step
]
=
expressionSet
.
getVariableIndex
(
energyName
[
step
]);
needsForces
[
step
]
=
true
;
}
forceGroup
[
step
]
=
-
1
;
if
(
forceGroup
[
step
]
>
-
1
)
}
forceGroupFlags
[
step
]
=
1
<<
forceGroup
[
step
];
if
(
usesVariable
(
expression
[
step
],
"energy"
))
{
if
(
forceGroupFlags
[
step
]
==
-
2
&&
step
>
0
)
needsEnergy
[
step
]
=
true
;
forceGroupFlags
[
step
]
=
forceGroupFlags
[
step
-
1
];
forceGroup
[
step
]
=
-
1
;
if
(
forceGroupFlags
[
step
]
!=
-
2
&&
savedForces
.
find
(
forceGroupFlags
[
step
])
==
savedForces
.
end
())
}
savedForces
[
forceGroupFlags
[
step
]]
=
new
CudaArray
(
cu
,
cu
.
getForce
().
getSize
(),
cu
.
getForce
().
getElementSize
(),
"savedForces"
);
for
(
int
i
=
0
;
i
<
32
;
i
++
)
{
}
if
(
usesVariable
(
expression
[
step
],
forceGroupName
[
i
]))
{
if
(
forceGroup
[
step
]
!=
-
2
)
// Allocate space for storing global values, both on the host and the device.
throw
OpenMMException
(
"A single computation step cannot depend on multiple force groups"
);
needsForces
[
step
]
=
true
;
globalValuesFloat
.
resize
(
expressionSet
.
getNumVariables
());
forceGroup
[
step
]
=
1
<<
i
;
globalValuesDouble
.
resize
(
expressionSet
.
getNumVariables
());
forceName
[
step
]
=
forceGroupName
[
i
];
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
}
globalValues
=
new
CudaArray
(
cu
,
expressionSet
.
getNumVariables
(),
elementSize
,
"globalValues"
);
if
(
usesVariable
(
expression
[
step
],
energyGroupName
[
i
]))
{
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
{
if
(
forceGroup
[
step
]
!=
-
2
)
globalValuesDouble
[
globalVariableIndex
[
i
]]
=
initialGlobalVariables
[
i
];
throw
OpenMMException
(
"A single computation step cannot depend on multiple force groups"
);
expressionSet
.
setVariable
(
globalVariableIndex
[
i
],
initialGlobalVariables
[
i
]);
needsEnergy
[
step
]
=
true
;
}
forceGroup
[
step
]
=
1
<<
i
;
for
(
int
i
=
0
;
i
<
(
int
)
parameterVariableIndex
.
size
();
i
++
)
{
energyName
[
step
]
=
energyGroupName
[
i
];
double
value
=
context
.
getParameter
(
parameterNames
[
i
]);
}
globalValuesDouble
[
parameterVariableIndex
[
i
]]
=
value
;
}
expressionSet
.
setVariable
(
parameterVariableIndex
[
i
],
value
);
}
// Record information about the targets of steps that will be stored in global variables.
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
if
(
stepType
[
step
]
==
CustomIntegrator
::
ComputeGlobal
||
stepType
[
step
]
==
CustomIntegrator
::
ComputeSum
)
{
if
(
variable
[
step
]
==
"dt"
)
stepTarget
[
step
].
type
=
DT
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
if
(
variable
[
step
]
==
integrator
.
getGlobalVariableName
(
i
))
stepTarget
[
step
].
type
=
VARIABLE
;
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
if
(
variable
[
step
]
==
parameterNames
[
i
])
stepTarget
[
step
].
type
=
PARAMETER
;
stepTarget
[
step
].
variableIndex
=
expressionSet
.
getVariableIndex
(
variable
[
step
]);
}
}
// Identify which per-DOF steps are going to require global variables or context parameters.
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
if
(
stepType
[
step
]
==
CustomIntegrator
::
ComputePerDof
||
stepType
[
step
]
==
CustomIntegrator
::
ComputeSum
)
{
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
if
(
usesVariable
(
expression
[
step
][
0
],
integrator
.
getGlobalVariableName
(
i
)))
needsGlobals
[
step
]
=
true
;
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
if
(
usesVariable
(
expression
[
step
][
0
],
parameterNames
[
i
]))
needsGlobals
[
step
]
=
true
;
}
}
invalidatesForces
[
step
]
=
(
stepType
[
step
]
==
CustomIntegrator
::
ConstrainPositions
||
affectsForce
.
find
(
variable
[
step
])
!=
affectsForce
.
end
());
if
(
forceGroup
[
step
]
==
-
2
&&
step
>
0
)
forceGroup
[
step
]
=
forceGroup
[
step
-
1
];
if
(
forceGroup
[
step
]
!=
-
2
&&
savedForces
.
find
(
forceGroup
[
step
])
==
savedForces
.
end
())
savedForces
[
forceGroup
[
step
]]
=
new
CudaArray
(
cu
,
cu
.
getForce
().
getSize
(),
cu
.
getForce
().
getElementSize
(),
"savedForces"
);
}
}
// Determine how each step will represent the position (as just a value, or a value plus a delta).
// Determine how each step will represent the position (as just a value, or a value plus a delta).
...
@@ -5911,9 +5950,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -5911,9 +5950,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
for
(
int
step
=
1
;
step
<
numSteps
;
step
++
)
{
for
(
int
step
=
1
;
step
<
numSteps
;
step
++
)
{
if
(
needsForces
[
step
]
||
needsEnergy
[
step
])
if
(
needsForces
[
step
]
||
needsEnergy
[
step
])
continue
;
continue
;
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputeGlobal
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputeGlobal
&&
!
usesVariable
(
expression
[
step
],
"uniform"
)
&&
!
usesVariable
(
expression
[
step
],
"gaussian"
))
merged
[
step
]
=
true
;
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputePerDof
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputePerDof
)
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputePerDof
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputePerDof
)
merged
[
step
]
=
true
;
merged
[
step
]
=
true
;
}
}
...
@@ -5933,15 +5969,15 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -5933,15 +5969,15 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
}
}
int
numGaussian
=
0
,
numUniform
=
0
;
int
numGaussian
=
0
,
numUniform
=
0
;
for
(
int
j
=
step
;
j
<
numSteps
&&
(
j
==
step
||
merged
[
j
]);
j
++
)
{
for
(
int
j
=
step
;
j
<
numSteps
&&
(
j
==
step
||
merged
[
j
]);
j
++
)
{
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"gaussian"
);
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
]
[
0
]
,
"gaussian"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"uniform"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
]
[
0
]
,
"uniform"
);
compute
<<
"{
\n
"
;
compute
<<
"{
\n
"
;
if
(
numGaussian
>
0
)
if
(
numGaussian
>
0
)
compute
<<
"float4 gaussian = gaussianValues[gaussianIndex+index];
\n
"
;
compute
<<
"float4 gaussian = gaussianValues[gaussianIndex+index];
\n
"
;
if
(
numUniform
>
0
)
if
(
numUniform
>
0
)
compute
<<
"float4 uniform = uniformValues[uniformIndex+index];
\n
"
;
compute
<<
"float4 uniform = uniformValues[uniformIndex+index];
\n
"
;
for
(
int
i
=
0
;
i
<
3
;
i
++
)
for
(
int
i
=
0
;
i
<
3
;
i
++
)
compute
<<
createPerDofComputation
(
stepType
[
j
]
==
CustomIntegrator
::
ComputePerDof
?
variable
[
j
]
:
""
,
expression
[
j
],
i
,
integrator
,
forceName
[
j
],
energyName
[
j
]);
compute
<<
createPerDofComputation
(
stepType
[
j
]
==
CustomIntegrator
::
ComputePerDof
?
variable
[
j
]
:
""
,
expression
[
j
]
[
0
]
,
i
,
integrator
,
forceName
[
j
],
energyName
[
j
]);
if
(
variable
[
j
]
==
"x"
)
{
if
(
variable
[
j
]
==
"x"
)
{
if
(
storePosAsDelta
[
j
])
if
(
storePosAsDelta
[
j
])
compute
<<
"posDelta[index] = convertFromDouble4(position-convertToDouble4(loadPos(posq, posqCorrection, index)));
\n
"
;
compute
<<
"posDelta[index] = convertFromDouble4(position-convertToDouble4(loadPos(posq, posqCorrection, index)));
\n
"
;
...
@@ -6010,14 +6046,12 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -6010,14 +6046,12 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
bool
found
=
false
;
bool
found
=
false
;
for
(
int
j
=
0
;
j
<
integrator
.
getNumGlobalVariables
()
&&
!
found
;
j
++
)
for
(
int
j
=
0
;
j
<
integrator
.
getNumGlobalVariables
()
&&
!
found
;
j
++
)
if
(
variable
[
step
]
==
integrator
.
getGlobalVariableName
(
j
))
{
if
(
variable
[
step
]
==
integrator
.
getGlobalVariableName
(
j
))
{
args2
.
push_back
(
&
globalValues
->
getDevicePointer
());
args2
.
push_back
(
&
kineticEnergy
->
getDevicePointer
());
defines
[
"SUM_OUTPUT_INDEX"
]
=
cu
.
intToString
(
j
);
found
=
true
;
found
=
true
;
}
}
for
(
int
j
=
0
;
j
<
(
int
)
parameterNames
.
size
()
&&
!
found
;
j
++
)
for
(
int
j
=
0
;
j
<
(
int
)
parameterNames
.
size
()
&&
!
found
;
j
++
)
if
(
variable
[
step
]
==
parameterNames
[
j
])
{
if
(
variable
[
step
]
==
parameterNames
[
j
])
{
args2
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
args2
.
push_back
(
&
kineticEnergy
->
getDevicePointer
());
defines
[
"SUM_OUTPUT_INDEX"
]
=
cu
.
intToString
(
j
);
found
=
true
;
found
=
true
;
modifiesParameters
=
true
;
modifiesParameters
=
true
;
}
}
...
@@ -6035,7 +6069,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -6035,7 +6069,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
stringstream
compute
;
stringstream
compute
;
for
(
int
i
=
step
;
i
<
numSteps
&&
(
i
==
step
||
merged
[
i
]);
i
++
)
for
(
int
i
=
step
;
i
<
numSteps
&&
(
i
==
step
||
merged
[
i
]);
i
++
)
compute
<<
"{
\n
"
<<
createGlobalComputation
(
variable
[
i
],
expression
[
i
],
integrator
,
energyName
[
i
])
<<
"}
\n
"
;
compute
<<
"{
\n
"
<<
createGlobalComputation
(
variable
[
i
],
expression
[
i
]
[
0
]
,
integrator
,
energyName
[
i
])
<<
"}
\n
"
;
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_STEP"
]
=
compute
.
str
();
replacements
[
"COMPUTE_STEP"
]
=
compute
.
str
();
CUmodule
module
=
cu
.
createModule
(
cu
.
replaceStrings
(
CudaKernelSources
::
customIntegratorGlobal
,
replacements
),
defines
);
CUmodule
module
=
cu
.
createModule
(
cu
.
replaceStrings
(
CudaKernelSources
::
customIntegratorGlobal
,
replacements
),
defines
);
...
@@ -6111,7 +6145,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -6111,7 +6145,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args
<<
", "
<<
buffer
.
getType
()
<<
"* __restrict__ "
<<
valueName
;
args
<<
", "
<<
buffer
.
getType
()
<<
"* __restrict__ "
<<
valueName
;
}
}
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
defines
[
"SUM_BUFFER_SIZE"
]
=
cu
.
intToString
(
3
*
numAtoms
);
defines
[
"SUM_BUFFER_SIZE"
]
=
cu
.
intToString
(
3
*
numAtoms
);
if
(
defines
.
find
(
"LOAD_POS_AS_DELTA"
)
!=
defines
.
end
())
if
(
defines
.
find
(
"LOAD_POS_AS_DELTA"
)
!=
defines
.
end
())
defines
.
erase
(
"LOAD_POS_AS_DELTA"
);
defines
.
erase
(
"LOAD_POS_AS_DELTA"
);
...
@@ -6144,7 +6177,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -6144,7 +6177,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
sumKineticEnergyKernel
=
cu
.
getKernel
(
module
,
useDouble
?
"computeDoubleSum"
:
"computeFloatSum"
);
sumKineticEnergyKernel
=
cu
.
getKernel
(
module
,
useDouble
?
"computeDoubleSum"
:
"computeFloatSum"
);
}
}
// Make sure all values (variables, parameters, etc.)
stored on the device
are up to date.
// Make sure all values (variables, parameters, etc.) are up to date.
if
(
!
deviceValuesAreCurrent
)
{
if
(
!
deviceValuesAreCurrent
)
{
if
(
useDouble
)
if
(
useDouble
)
...
@@ -6156,38 +6189,14 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -6156,38 +6189,14 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
localValuesAreCurrent
=
false
;
localValuesAreCurrent
=
false
;
double
stepSize
=
integrator
.
getStepSize
();
double
stepSize
=
integrator
.
getStepSize
();
if
(
stepSize
!=
prevStepSize
)
{
if
(
stepSize
!=
prevStepSize
)
{
if
(
useDouble
)
{
recordGlobalValue
(
stepSize
,
GlobalTarget
(
DT
,
dtVariableIndex
));
double
size
[]
=
{
0
,
stepSize
};
integration
.
getStepSize
().
upload
(
size
);
}
else
{
float
size
[]
=
{
0
,
(
float
)
stepSize
};
integration
.
getStepSize
().
upload
(
size
);
}
prevStepSize
=
stepSize
;
}
}
bool
paramsChanged
=
false
;
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
if
(
useDouble
)
{
double
value
=
context
.
getParameter
(
parameterNames
[
i
]);
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
if
(
value
!=
globalValuesDouble
[
parameterVariableIndex
[
i
]])
{
double
value
=
context
.
getParameter
(
parameterNames
[
i
]);
globalValuesDouble
[
parameterVariableIndex
[
i
]]
=
value
;
if
(
value
!=
contextValuesDouble
[
i
])
{
deviceGlobalsAreCurrent
=
false
;
contextValuesDouble
[
i
]
=
value
;
paramsChanged
=
true
;
}
}
}
if
(
paramsChanged
)
contextParameterValues
->
upload
(
contextValuesDouble
);
}
else
{
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
float
value
=
(
float
)
context
.
getParameter
(
parameterNames
[
i
]);
if
(
value
!=
contextValuesFloat
[
i
])
{
contextValuesFloat
[
i
]
=
value
;
paramsChanged
=
true
;
}
}
if
(
paramsChanged
)
contextParameterValues
->
upload
(
contextValuesFloat
);
}
}
}
}
...
@@ -6204,7 +6213,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -6204,7 +6213,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
for
(
int
i
=
0
;
i
<
numSteps
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numSteps
;
i
++
)
{
int
lastForceGroups
=
context
.
getLastForceGroups
();
int
lastForceGroups
=
context
.
getLastForceGroups
();
if
((
needsForces
[
i
]
||
needsEnergy
[
i
])
&&
(
!
forcesAreValid
||
lastForceGroups
!=
forceGroup
[
i
]))
{
if
((
needsForces
[
i
]
||
needsEnergy
[
i
])
&&
(
!
forcesAreValid
||
lastForceGroups
!=
forceGroup
Flags
[
i
]))
{
if
(
forcesAreValid
&&
savedForces
.
find
(
lastForceGroups
)
!=
savedForces
.
end
())
{
if
(
forcesAreValid
&&
savedForces
.
find
(
lastForceGroups
)
!=
savedForces
.
end
())
{
// The forces are still valid. We just need a different force group right now. Save the old
// The forces are still valid. We just need a different force group right now. Save the old
// forces in case we need them again.
// forces in case we need them again.
...
@@ -6218,31 +6227,31 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -6218,31 +6227,31 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
// Recompute forces and/or energy. Figure out what is actually needed
// Recompute forces and/or energy. Figure out what is actually needed
// between now and the next time they get invalidated again.
// between now and the next time they get invalidated again.
bool
computeForce
=
false
,
computeEnergy
=
false
;
bool
computeForce
=
(
needsForces
[
i
]
||
computeBothForceAndEnergy
[
i
]);
for
(
int
j
=
i
;
;
j
++
)
{
bool
computeEnergy
=
(
needsEnergy
[
i
]
||
computeBothForceAndEnergy
[
i
]);
if
(
needsForces
[
j
])
if
(
!
computeEnergy
&&
validSavedForces
.
find
(
forceGroupFlags
[
i
])
!=
validSavedForces
.
end
())
{
computeForce
=
true
;
if
(
needsEnergy
[
j
])
computeEnergy
=
true
;
if
(
invalidatesForces
[
j
])
break
;
if
(
j
==
numSteps
-
1
)
j
=
-
1
;
if
(
j
==
i
-
1
)
break
;
}
if
(
!
computeEnergy
&&
validSavedForces
.
find
(
forceGroup
[
i
])
!=
validSavedForces
.
end
())
{
// We can just restore the forces we saved earlier.
// We can just restore the forces we saved earlier.
savedForces
[
forceGroup
[
i
]]
->
copyTo
(
cu
.
getForce
());
savedForces
[
forceGroup
Flags
[
i
]]
->
copyTo
(
cu
.
getForce
());
}
}
else
{
else
{
recordChangedParameters
(
context
);
recordChangedParameters
(
context
);
energy
=
context
.
calcForcesAndEnergy
(
computeForce
,
computeEnergy
,
forceGroup
[
i
]);
energy
=
context
.
calcForcesAndEnergy
(
computeForce
,
computeEnergy
,
forceGroup
Flags
[
i
]);
energyFloat
=
(
float
)
energy
;
energyFloat
=
(
float
)
energy
;
}
}
forcesAreValid
=
true
;
forcesAreValid
=
true
;
}
}
if
(
needsGlobals
[
i
]
&&
!
deviceGlobalsAreCurrent
)
{
// Upload the global values to the device.
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
())
globalValues
->
upload
(
globalValuesDouble
);
else
{
for
(
int
j
=
0
;
j
<
(
int
)
globalValuesDouble
.
size
();
j
++
)
globalValuesFloat
[
j
]
=
(
float
)
globalValuesDouble
[
j
];
globalValues
->
upload
(
globalValuesFloat
);
}
}
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputePerDof
&&
!
merged
[
i
])
{
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputePerDof
&&
!
merged
[
i
])
{
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
kernelArgs
[
i
][
0
][
1
]
=
&
posCorrection
;
kernelArgs
[
i
][
0
][
1
]
=
&
posCorrection
;
...
@@ -6253,12 +6262,11 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -6253,12 +6262,11 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
}
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeGlobal
&&
!
merged
[
i
])
{
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeGlobal
)
{
float
uniform
=
SimTKOpenMMUtilities
::
getUniformlyDistributedRandomNumber
();
expressionSet
.
setVariable
(
uniformVariableIndex
,
SimTKOpenMMUtilities
::
getUniformlyDistributedRandomNumber
());
float
gauss
=
SimTKOpenMMUtilities
::
getNormallyDistributedRandomNumber
();
expressionSet
.
setVariable
(
gaussianVariableIndex
,
SimTKOpenMMUtilities
::
getNormallyDistributedRandomNumber
());
kernelArgs
[
i
][
0
][
3
]
=
&
uniform
;
expressionSet
.
setVariable
(
stepEnergyVariableIndex
[
i
],
energy
);
kernelArgs
[
i
][
0
][
4
]
=
&
gauss
;
recordGlobalValue
(
globalExpressions
[
i
][
0
].
evaluate
(),
stepTarget
[
i
]);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
1
,
1
);
}
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeSum
)
{
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeSum
)
{
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
...
@@ -6271,6 +6279,16 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -6271,6 +6279,16 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
cu
.
clearBuffer
(
*
sumBuffer
);
cu
.
clearBuffer
(
*
sumBuffer
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
1
],
&
kernelArgs
[
i
][
1
][
0
],
CudaContext
::
ThreadBlockSize
,
CudaContext
::
ThreadBlockSize
);
cu
.
executeKernel
(
kernels
[
i
][
1
],
&
kernelArgs
[
i
][
1
][
0
],
CudaContext
::
ThreadBlockSize
,
CudaContext
::
ThreadBlockSize
);
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
())
{
double
value
;
kineticEnergy
->
download
(
&
value
);
globalValuesDouble
[
stepTarget
[
i
].
variableIndex
]
=
value
;
}
else
{
float
value
;
kineticEnergy
->
download
(
&
value
);
globalValuesDouble
[
stepTarget
[
i
].
variableIndex
]
=
value
;
}
}
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
UpdateContextState
)
{
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
UpdateContextState
)
{
recordChangedParameters
(
context
);
recordChangedParameters
(
context
);
...
@@ -6335,52 +6353,63 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
...
@@ -6335,52 +6353,63 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
}
}
}
}
void
CudaIntegrateCustomStepKernel
::
recordGlobalValue
(
double
value
,
GlobalTarget
target
)
{
switch
(
target
.
type
)
{
case
DT
:
globalValuesDouble
[
dtVariableIndex
]
=
value
;
deviceGlobalsAreCurrent
=
false
;
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
())
{
double
size
[]
=
{
0
,
value
};
cu
.
getIntegrationUtilities
().
getStepSize
().
upload
(
size
);
}
else
{
float
size
[]
=
{
0
,
(
float
)
value
};
cu
.
getIntegrationUtilities
().
getStepSize
().
upload
(
size
);
}
prevStepSize
=
value
;
break
;
case
VARIABLE
:
case
PARAMETER
:
expressionSet
.
setVariable
(
target
.
variableIndex
,
value
);
globalValuesDouble
[
target
.
variableIndex
]
=
value
;
deviceGlobalsAreCurrent
=
false
;
break
;
}
}
void
CudaIntegrateCustomStepKernel
::
recordChangedParameters
(
ContextImpl
&
context
)
{
void
CudaIntegrateCustomStepKernel
::
recordChangedParameters
(
ContextImpl
&
context
)
{
if
(
!
modifiesParameters
)
if
(
!
modifiesParameters
)
return
;
return
;
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
())
{
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
contextParameterValues
->
download
(
contextValuesDouble
);
double
value
=
context
.
getParameter
(
parameterNames
[
i
]);
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
if
(
value
!=
globalValuesDouble
[
parameterVariableIndex
[
i
]])
double
value
=
context
.
getParameter
(
parameterNames
[
i
]);
context
.
setParameter
(
parameterNames
[
i
],
globalValuesDouble
[
parameterVariableIndex
[
i
]]);
if
(
value
!=
contextValuesDouble
[
i
])
context
.
setParameter
(
parameterNames
[
i
],
contextValuesDouble
[
i
]);
}
}
else
{
contextParameterValues
->
download
(
contextValuesFloat
);
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
float
value
=
(
float
)
context
.
getParameter
(
parameterNames
[
i
]);
if
(
value
!=
contextValuesFloat
[
i
])
context
.
setParameter
(
parameterNames
[
i
],
contextValuesFloat
[
i
]);
}
}
}
}
}
void
CudaIntegrateCustomStepKernel
::
getGlobalVariables
(
ContextImpl
&
context
,
vector
<
double
>&
values
)
const
{
void
CudaIntegrateCustomStepKernel
::
getGlobalVariables
(
ContextImpl
&
context
,
vector
<
double
>&
values
)
const
{
values
.
resize
(
numGlobalVariables
);
if
(
globalValues
==
NULL
)
{
if
(
numGlobalVariables
==
0
)
// The data structures haven't been created yet, so just return the list of values that was given earlier.
return
;
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
())
values
=
initialGlobalVariables
;
globalValues
->
download
(
values
);
else
{
vector
<
float
>
buffer
;
globalValues
->
download
(
buffer
);
for
(
int
i
=
0
;
i
<
numGlobalVariables
;
i
++
)
values
[
i
]
=
buffer
[
i
];
}
}
values
.
resize
(
numGlobalVariables
);
for
(
int
i
=
0
;
i
<
numGlobalVariables
;
i
++
)
values
[
i
]
=
globalValuesDouble
[
globalVariableIndex
[
i
]];
}
}
void
CudaIntegrateCustomStepKernel
::
setGlobalVariables
(
ContextImpl
&
context
,
const
vector
<
double
>&
values
)
{
void
CudaIntegrateCustomStepKernel
::
setGlobalVariables
(
ContextImpl
&
context
,
const
vector
<
double
>&
values
)
{
if
(
numGlobalVariables
==
0
)
if
(
numGlobalVariables
==
0
)
return
;
return
;
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
())
if
(
globalValues
==
NULL
)
{
globalValues
->
upload
(
values
);
// The data structures haven't been created yet, so just store the list of values.
else
{
vector
<
float
>
buffer
(
numGlobalVariables
);
initialGlobalVariables
=
values
;
for
(
int
i
=
0
;
i
<
numGlobalVariables
;
i
++
)
return
;
buffer
[
i
]
=
(
float
)
values
[
i
];
globalValues
->
upload
(
buffer
);
}
}
for
(
int
i
=
0
;
i
<
numGlobalVariables
;
i
++
)
globalValuesDouble
[
globalVariableIndex
[
i
]]
=
values
[
i
];
deviceGlobalsAreCurrent
=
false
;
}
}
void
CudaIntegrateCustomStepKernel
::
getPerDofVariable
(
ContextImpl
&
context
,
int
variable
,
vector
<
Vec3
>&
values
)
const
{
void
CudaIntegrateCustomStepKernel
::
getPerDofVariable
(
ContextImpl
&
context
,
int
variable
,
vector
<
Vec3
>&
values
)
const
{
...
...
platforms/cuda/src/kernels/customIntegrator.cu
View file @
59c809c0
...
@@ -11,7 +11,7 @@ extern "C" __global__ void computeFloatSum(const float* __restrict__ sumBuffer,
...
@@ -11,7 +11,7 @@ extern "C" __global__ void computeFloatSum(const float* __restrict__ sumBuffer,
tempBuffer
[
thread
]
+=
tempBuffer
[
thread
+
i
];
tempBuffer
[
thread
]
+=
tempBuffer
[
thread
+
i
];
}
}
if
(
thread
==
0
)
if
(
thread
==
0
)
result
[
SUM_OUTPUT_INDEX
]
=
tempBuffer
[
0
];
*
result
=
tempBuffer
[
0
];
}
}
extern
"C"
__global__
void
computeDoubleSum
(
const
double
*
__restrict__
sumBuffer
,
double
*
result
)
{
extern
"C"
__global__
void
computeDoubleSum
(
const
double
*
__restrict__
sumBuffer
,
double
*
result
)
{
...
@@ -27,7 +27,7 @@ extern "C" __global__ void computeDoubleSum(const double* __restrict__ sumBuffer
...
@@ -27,7 +27,7 @@ extern "C" __global__ void computeDoubleSum(const double* __restrict__ sumBuffer
tempBuffer
[
thread
]
+=
tempBuffer
[
thread
+
i
];
tempBuffer
[
thread
]
+=
tempBuffer
[
thread
+
i
];
}
}
if
(
thread
==
0
)
if
(
thread
==
0
)
result
[
SUM_OUTPUT_INDEX
]
=
tempBuffer
[
0
];
*
result
=
tempBuffer
[
0
];
}
}
extern
"C"
__global__
void
applyPositionDeltas
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
)
{
extern
"C"
__global__
void
applyPositionDeltas
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
)
{
...
...
platforms/cuda/src/kernels/integrationUtilities.cu
View file @
59c809c0
...
@@ -224,7 +224,6 @@ extern "C" __global__ void applyShakeToVelocities(int numClusters, mixed tol, co
...
@@ -224,7 +224,6 @@ extern "C" __global__ void applyShakeToVelocities(int numClusters, mixed tol, co
mixed4
xpj2
=
make_mixed4
(
0
);
mixed4
xpj2
=
make_mixed4
(
0
);
float
invMassCentral
=
params
.
x
;
float
invMassCentral
=
params
.
x
;
float
avgMass
=
params
.
y
;
float
avgMass
=
params
.
y
;
float
d2
=
params
.
z
;
float
invMassPeripheral
=
params
.
w
;
float
invMassPeripheral
=
params
.
w
;
if
(
atoms
.
z
!=
-
1
)
{
if
(
atoms
.
z
!=
-
1
)
{
pos2
=
loadPos
(
oldPos
,
posCorrection
,
atoms
.
z
);
pos2
=
loadPos
(
oldPos
,
posCorrection
,
atoms
.
z
);
...
@@ -245,9 +244,6 @@ extern "C" __global__ void applyShakeToVelocities(int numClusters, mixed tol, co
...
@@ -245,9 +244,6 @@ extern "C" __global__ void applyShakeToVelocities(int numClusters, mixed tol, co
mixed
rij1sq
=
rij1
.
x
*
rij1
.
x
+
rij1
.
y
*
rij1
.
y
+
rij1
.
z
*
rij1
.
z
;
mixed
rij1sq
=
rij1
.
x
*
rij1
.
x
+
rij1
.
y
*
rij1
.
y
+
rij1
.
z
*
rij1
.
z
;
mixed
rij2sq
=
rij2
.
x
*
rij2
.
x
+
rij2
.
y
*
rij2
.
y
+
rij2
.
z
*
rij2
.
z
;
mixed
rij2sq
=
rij2
.
x
*
rij2
.
x
+
rij2
.
y
*
rij2
.
y
+
rij2
.
z
*
rij2
.
z
;
mixed
rij3sq
=
rij3
.
x
*
rij3
.
x
+
rij3
.
y
*
rij3
.
y
+
rij3
.
z
*
rij3
.
z
;
mixed
rij3sq
=
rij3
.
x
*
rij3
.
x
+
rij3
.
y
*
rij3
.
y
+
rij3
.
z
*
rij3
.
z
;
mixed
ld1
=
d2
-
rij1sq
;
mixed
ld2
=
d2
-
rij2sq
;
mixed
ld3
=
d2
-
rij3sq
;
// Iterate until convergence.
// Iterate until convergence.
...
@@ -605,8 +601,6 @@ extern "C" __global__ void computeCCMAVelocityConstraintForce(const int2* __rest
...
@@ -605,8 +601,6 @@ extern "C" __global__ void computeCCMAVelocityConstraintForce(const int2* __rest
if
(
threadIdx
.
x
==
0
)
if
(
threadIdx
.
x
==
0
)
groupConverged
=
1
;
groupConverged
=
1
;
__syncthreads
();
__syncthreads
();
mixed
lowerTol
=
1
-
2
*
tol
+
tol
*
tol
;
mixed
upperTol
=
1
+
2
*
tol
+
tol
*
tol
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_CCMA_CONSTRAINTS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_CCMA_CONSTRAINTS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Compute the force due to this constraint.
// Compute the force due to this constraint.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment