Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
59c809c0
Commit
59c809c0
authored
Jul 16, 2015
by
peastman
Browse files
Began overhaul of CUDA CustomIntegrator in preparation for supporting flow control
parent
44b96f0c
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
230 additions
and
169 deletions
+230
-169
openmmapi/include/openmm/internal/CompiledExpressionSet.h
openmmapi/include/openmm/internal/CompiledExpressionSet.h
+7
-3
openmmapi/src/CompiledExpressionSet.cpp
openmmapi/src/CompiledExpressionSet.cpp
+6
-2
platforms/cpu/include/CpuCustomGBForce.h
platforms/cpu/include/CpuCustomGBForce.h
+1
-1
platforms/cpu/include/CpuCustomManyParticleForce.h
platforms/cpu/include/CpuCustomManyParticleForce.h
+1
-1
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+32
-2
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+181
-152
platforms/cuda/src/kernels/customIntegrator.cu
platforms/cuda/src/kernels/customIntegrator.cu
+2
-2
platforms/cuda/src/kernels/integrationUtilities.cu
platforms/cuda/src/kernels/integrationUtilities.cu
+0
-6
No files found.
platforms/cpu/include
/CompiledExpressionSet.h
→
openmmapi/include/openmm/internal
/CompiledExpressionSet.h
View file @
59c809c0
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2014 Stanford University and the Authors.
*
* Portions copyright (c) 2014
-2015
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -33,7 +33,7 @@
* -------------------------------------------------------------------------- */
#include "lepton/CompiledExpression.h"
#include "windowsExport
Cpu
.h"
#include "windowsExport.h"
#include <string>
#include <vector>
...
...
@@ -42,7 +42,7 @@ namespace OpenMM {
/**
* This class simplifies the management of a set of related CompiledExpressions that share variables.
*/
class
OPENMM_EXPORT
_CPU
CompiledExpressionSet
{
class
OPENMM_EXPORT
CompiledExpressionSet
{
public:
CompiledExpressionSet
();
/**
...
...
@@ -60,6 +60,10 @@ public:
* @param value the value to set it to
*/
void
setVariable
(
int
index
,
double
value
);
/**
* Get the total number of variables for which indices have been allocated.
*/
int
getNumVariables
()
const
;
private:
std
::
vector
<
Lepton
::
CompiledExpression
*>
expressions
;
std
::
vector
<
std
::
string
>
variables
;
...
...
platforms/cpu
/src/CompiledExpressionSet.cpp
→
openmmapi
/src/CompiledExpressionSet.cpp
View file @
59c809c0
/* Portions copyright (c) 2014 Stanford University and Simbios.
/* Portions copyright (c) 2014
-2015
Stanford University and Simbios.
* Contributors: Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
...
...
@@ -21,7 +21,7 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "CompiledExpressionSet.h"
#include "
openmm/internal/
CompiledExpressionSet.h"
using
namespace
OpenMM
;
using
namespace
Lepton
;
...
...
@@ -54,3 +54,7 @@ void CompiledExpressionSet::setVariable(int index, double value) {
for
(
int
i
=
0
;
i
<
(
int
)
variableReferences
[
index
].
size
();
i
++
)
*
variableReferences
[
index
][
i
]
=
value
;
}
int
CompiledExpressionSet
::
getNumVariables
()
const
{
return
variables
.
size
();
}
platforms/cpu/include/CpuCustomGBForce.h
View file @
59c809c0
...
...
@@ -25,10 +25,10 @@
#ifndef OPENMM_CPU_CUSTOM_GB_FORCE_H__
#define OPENMM_CPU_CUSTOM_GB_FORCE_H__
#include "CompiledExpressionSet.h"
#include "CpuNeighborList.h"
#include "lepton/CompiledExpression.h"
#include "openmm/CustomGBForce.h"
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/vectorize.h"
#include <map>
...
...
platforms/cpu/include/CpuCustomManyParticleForce.h
View file @
59c809c0
...
...
@@ -27,9 +27,9 @@
#include "ReferenceForce.h"
#include "ReferenceBondIxn.h"
#include "CompiledExpressionSet.h"
#include "CpuNeighborList.h"
#include "openmm/CustomManyParticleForce.h"
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/vectorize.h"
#include "lepton/CompiledExpression.h"
...
...
platforms/cuda/include/CudaKernels.h
View file @
59c809c0
...
...
@@ -35,6 +35,9 @@
#include "CudaSort.h"
#include "openmm/kernels.h"
#include "openmm/System.h"
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/CustomIntegratorUtilities.h"
#include "lepton/CompiledExpression.h"
#include <cufft.h>
namespace
OpenMM
{
...
...
@@ -1213,6 +1216,7 @@ private:
*/
class
CudaIntegrateCustomStepKernel
:
public
IntegrateCustomStepKernel
{
public:
enum
GlobalTargetType
{
DT
,
VARIABLE
,
PARAMETER
};
CudaIntegrateCustomStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
IntegrateCustomStepKernel
(
name
,
platform
),
cu
(
cu
),
hasInitializedKernels
(
false
),
localValuesAreCurrent
(
false
),
globalValues
(
NULL
),
contextParameterValues
(
NULL
),
sumBuffer
(
NULL
),
potentialEnergy
(
NULL
),
kineticEnergy
(
NULL
),
uniformRandoms
(
NULL
),
randomSeed
(
NULL
),
perDofValues
(
NULL
)
{
...
...
@@ -1279,15 +1283,17 @@ public:
void
setPerDofVariable
(
ContextImpl
&
context
,
int
variable
,
const
std
::
vector
<
Vec3
>&
values
);
private:
class
ReorderListener
;
class
GlobalTarget
;
std
::
string
createGlobalComputation
(
const
std
::
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
CustomIntegrator
&
integrator
,
const
std
::
string
&
energyName
);
std
::
string
createPerDofComputation
(
const
std
::
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
int
component
,
CustomIntegrator
&
integrator
,
const
std
::
string
&
forceName
,
const
std
::
string
&
energyName
);
void
prepareForComputation
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
void
recordGlobalValue
(
double
value
,
GlobalTarget
target
);
void
recordChangedParameters
(
ContextImpl
&
context
);
CudaContext
&
cu
;
double
prevStepSize
,
energy
;
float
energyFloat
;
int
numGlobalVariables
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
modifiesParameters
,
keNeedsForce
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
deviceGlobalsAreCurrent
,
modifiesParameters
,
keNeedsForce
;
mutable
bool
localValuesAreCurrent
;
CudaArray
*
globalValues
;
CudaArray
*
contextParameterValues
;
...
...
@@ -1303,19 +1309,43 @@ private:
mutable
std
::
vector
<
std
::
vector
<
double
>
>
localPerDofValuesDouble
;
std
::
vector
<
float
>
contextValuesFloat
;
std
::
vector
<
double
>
contextValuesDouble
;
std
::
vector
<
float
>
globalValuesFloat
;
std
::
vector
<
double
>
globalValuesDouble
;
std
::
vector
<
double
>
initialGlobalVariables
;
std
::
vector
<
std
::
vector
<
CUfunction
>
>
kernels
;
std
::
vector
<
std
::
vector
<
std
::
vector
<
void
*>
>
>
kernelArgs
;
std
::
vector
<
void
*>
kineticEnergyArgs
;
CUfunction
randomKernel
,
kineticEnergyKernel
,
sumKineticEnergyKernel
;
std
::
vector
<
CustomIntegrator
::
ComputationType
>
stepType
;
std
::
vector
<
CustomIntegratorUtilities
::
Comparison
>
comparisons
;
std
::
vector
<
std
::
vector
<
Lepton
::
CompiledExpression
>
>
globalExpressions
;
CompiledExpressionSet
expressionSet
;
std
::
vector
<
bool
>
needsGlobals
;
std
::
vector
<
bool
>
needsForces
;
std
::
vector
<
bool
>
needsEnergy
;
std
::
vector
<
bool
>
computeBothForceAndEnergy
;
std
::
vector
<
bool
>
invalidatesForces
;
std
::
vector
<
bool
>
merged
;
std
::
vector
<
int
>
forceGroup
;
std
::
vector
<
int
>
forceGroupFlags
;
std
::
vector
<
int
>
blockEnd
;
std
::
vector
<
int
>
requiredGaussian
;
std
::
vector
<
int
>
requiredUniform
;
std
::
vector
<
int
>
stepEnergyVariableIndex
;
std
::
vector
<
int
>
globalVariableIndex
;
std
::
vector
<
int
>
parameterVariableIndex
;
int
gaussianVariableIndex
,
uniformVariableIndex
,
dtVariableIndex
;
std
::
vector
<
std
::
string
>
parameterNames
;
std
::
vector
<
GlobalTarget
>
stepTarget
;
};
class
CudaIntegrateCustomStepKernel
::
GlobalTarget
{
public:
CudaIntegrateCustomStepKernel
::
GlobalTargetType
type
;
int
variableIndex
;
GlobalTarget
()
{
}
GlobalTarget
(
CudaIntegrateCustomStepKernel
::
GlobalTargetType
type
,
int
variableIndex
)
:
type
(
type
),
variableIndex
(
variableIndex
)
{
}
};
/**
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
59c809c0
...
...
@@ -5696,7 +5696,6 @@ void CudaIntegrateCustomStepKernel::initialize(const System& system, const Custo
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
numGlobalVariables
=
integrator
.
getNumGlobalVariables
();
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
globalValues
=
new
CudaArray
(
cu
,
max
(
1
,
numGlobalVariables
),
elementSize
,
"globalVariables"
);
sumBuffer
=
new
CudaArray
(
cu
,
((
3
*
system
.
getNumParticles
()
+
3
)
/
4
)
*
4
,
elementSize
,
"sumBuffer"
);
potentialEnergy
=
new
CudaArray
(
cu
,
1
,
cu
.
getEnergyBuffer
().
getElementSize
(),
"potentialEnergy"
);
kineticEnergy
=
new
CudaArray
(
cu
,
1
,
elementSize
,
"kineticEnergy"
);
...
...
@@ -5764,11 +5763,11 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari
if
(
energyName
!=
""
)
variables
[
energyName
]
=
"energy"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
cu
.
intToString
(
i
)
+
"]"
;
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
cu
.
intToString
(
globalVariableIndex
[
i
]
)
+
"]"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumPerDofVariables
();
i
++
)
variables
[
integrator
.
getPerDofVariableName
(
i
)]
=
"perDof"
+
suffix
.
substr
(
1
)
+
perDofValues
->
getParameterSuffix
(
i
);
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
variables
[
parameterNames
[
i
]]
=
"
param
s["
+
cu
.
intToString
(
i
)
+
"]"
;
variables
[
parameterNames
[
i
]]
=
"
global
s["
+
cu
.
intToString
(
parameterVariableIndex
[
i
]
)
+
"]"
;
vector
<
const
TabulatedFunction
*>
functions
;
vector
<
pair
<
string
,
string
>
>
functionNames
;
return
cu
.
getExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
functionNames
,
"temp"
+
cu
.
intToString
(
component
)
+
"_"
,
"double"
);
...
...
@@ -5808,10 +5807,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
kernelArgs
.
resize
(
integrator
.
getNumComputations
());
requiredGaussian
.
resize
(
integrator
.
getNumComputations
(),
0
);
requiredUniform
.
resize
(
integrator
.
getNumComputations
(),
0
);
needs
Force
s
.
resize
(
numSteps
,
false
);
needsEnergy
.
resize
(
numSteps
,
false
);
forceGroup
.
resize
(
numSteps
,
-
2
);
invalidatesForces
.
resize
(
numSteps
,
false
);
needs
Global
s
.
resize
(
numSteps
,
false
);
globalExpressions
.
resize
(
numSteps
);
stepType
.
resize
(
numSteps
);
stepTarget
.
resize
(
numSteps
);
merged
.
resize
(
numSteps
,
false
);
modifiesParameters
=
false
;
map
<
string
,
string
>
defines
;
...
...
@@ -5819,24 +5818,40 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
defines
[
"WORK_GROUP_SIZE"
]
=
cu
.
intToString
(
CudaContext
::
ThreadBlockSize
);
defines
[
"SUM_BUFFER_SIZE"
]
=
"0"
;
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
// Build a list of all variables that affect the forces, so we can tell which
// steps invalidate them.
set
<
string
>
affectsForce
;
affectsForce
.
insert
(
"x"
);
for
(
vector
<
ForceImpl
*>::
const_iterator
iter
=
context
.
getForceImpls
().
begin
();
iter
!=
context
.
getForceImpls
().
end
();
++
iter
)
{
const
map
<
string
,
double
>
params
=
(
*
iter
)
->
getDefaultParameters
();
for
(
map
<
string
,
double
>::
const_iterator
param
=
params
.
begin
();
param
!=
params
.
end
();
++
param
)
affectsForce
.
insert
(
param
->
first
);
// Record information about all the computation steps.
vector
<
string
>
variable
(
numSteps
);
vector
<
int
>
forceGroup
;
vector
<
vector
<
Lepton
::
ParsedExpression
>
>
expression
;
CustomIntegratorUtilities
::
analyzeComputations
(
context
,
integrator
,
expression
,
comparisons
,
blockEnd
,
invalidatesForces
,
needsForces
,
needsEnergy
,
computeBothForceAndEnergy
,
forceGroup
);
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
string
expr
;
integrator
.
getComputationStep
(
step
,
stepType
[
step
],
variable
[
step
],
expr
);
if
(
stepType
[
step
]
==
CustomIntegrator
::
BeginWhileBlock
)
blockEnd
[
blockEnd
[
step
]]
=
step
;
// Record where to branch back to.
if
(
stepType
[
step
]
==
CustomIntegrator
::
ComputeGlobal
||
stepType
[
step
]
==
CustomIntegrator
::
BeginIfBlock
||
stepType
[
step
]
==
CustomIntegrator
::
BeginWhileBlock
)
for
(
int
i
=
0
;
i
<
(
int
)
expression
[
step
].
size
();
i
++
)
globalExpressions
[
step
].
push_back
(
expression
[
step
][
i
].
createCompiledExpression
());
}
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
globalExpressions
[
step
].
size
();
i
++
)
expressionSet
.
registerExpression
(
globalExpressions
[
step
][
i
]);
}
// Record
information about all
the
c
omp
utation steps
.
// Record
the indices for variables in
the
C
omp
iledExpressionSet
.
stepType
.
resize
(
numSteps
);
vector
<
string
>
variable
(
numSteps
);
vector
<
Lepton
::
ParsedExpression
>
expression
(
numSteps
);
gaussianVariableIndex
=
expressionSet
.
getVariableIndex
(
"gaussian"
);
uniformVariableIndex
=
expressionSet
.
getVariableIndex
(
"uniform"
);
dtVariableIndex
=
expressionSet
.
getVariableIndex
(
"dt"
);
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
globalVariableIndex
.
push_back
(
expressionSet
.
getVariableIndex
(
integrator
.
getGlobalVariableName
(
i
)));
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
parameterVariableIndex
.
push_back
(
expressionSet
.
getVariableIndex
(
parameterNames
[
i
]));
// Record the variable names and flags for the force and energy in each step.
forceGroupFlags
.
resize
(
numSteps
,
-
1
);
vector
<
string
>
forceGroupName
;
vector
<
string
>
energyGroupName
;
for
(
int
i
=
0
;
i
<
32
;
i
++
)
{
...
...
@@ -5849,41 +5864,65 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
}
vector
<
string
>
forceName
(
numSteps
,
"f"
);
vector
<
string
>
energyName
(
numSteps
,
"energy"
);
stepEnergyVariableIndex
.
resize
(
numSteps
,
expressionSet
.
getVariableIndex
(
"energy"
));
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
string
expr
;
integrator
.
getComputationStep
(
step
,
stepType
[
step
],
variable
[
step
],
expr
);
if
(
expr
.
size
()
>
0
)
{
expression
[
step
]
=
Lepton
::
Parser
::
parse
(
expr
).
optimize
();
if
(
usesVariable
(
expression
[
step
],
"f"
))
{
needsForces
[
step
]
=
true
;
forceGroup
[
step
]
=
-
1
;
}
if
(
usesVariable
(
expression
[
step
],
"energy"
))
{
needsEnergy
[
step
]
=
true
;
forceGroup
[
step
]
=
-
1
;
}
for
(
int
i
=
0
;
i
<
32
;
i
++
)
{
if
(
usesVariable
(
expression
[
step
],
forceGroupName
[
i
]))
{
if
(
forceGroup
[
step
]
!=
-
2
)
throw
OpenMMException
(
"A single computation step cannot depend on multiple force groups"
);
needsForces
[
step
]
=
true
;
forceGroup
[
step
]
=
1
<<
i
;
forceName
[
step
]
=
forceGroupName
[
i
];
}
if
(
usesVariable
(
expression
[
step
],
energyGroupName
[
i
]))
{
if
(
forceGroup
[
step
]
!=
-
2
)
throw
OpenMMException
(
"A single computation step cannot depend on multiple force groups"
);
needsEnergy
[
step
]
=
true
;
forceGroup
[
step
]
=
1
<<
i
;
energyName
[
step
]
=
energyGroupName
[
i
];
}
}
if
(
needsForces
[
step
]
&&
forceGroup
[
step
]
>
-
1
)
forceName
[
step
]
=
forceGroupName
[
forceGroup
[
step
]];
if
(
needsEnergy
[
step
]
&&
forceGroup
[
step
]
>
-
1
)
{
energyName
[
step
]
=
energyGroupName
[
forceGroup
[
step
]];
stepEnergyVariableIndex
[
step
]
=
expressionSet
.
getVariableIndex
(
energyName
[
step
]);
}
if
(
forceGroup
[
step
]
>
-
1
)
forceGroupFlags
[
step
]
=
1
<<
forceGroup
[
step
];
if
(
forceGroupFlags
[
step
]
==
-
2
&&
step
>
0
)
forceGroupFlags
[
step
]
=
forceGroupFlags
[
step
-
1
];
if
(
forceGroupFlags
[
step
]
!=
-
2
&&
savedForces
.
find
(
forceGroupFlags
[
step
])
==
savedForces
.
end
())
savedForces
[
forceGroupFlags
[
step
]]
=
new
CudaArray
(
cu
,
cu
.
getForce
().
getSize
(),
cu
.
getForce
().
getElementSize
(),
"savedForces"
);
}
// Allocate space for storing global values, both on the host and the device.
globalValuesFloat
.
resize
(
expressionSet
.
getNumVariables
());
globalValuesDouble
.
resize
(
expressionSet
.
getNumVariables
());
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
globalValues
=
new
CudaArray
(
cu
,
expressionSet
.
getNumVariables
(),
elementSize
,
"globalValues"
);
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
{
globalValuesDouble
[
globalVariableIndex
[
i
]]
=
initialGlobalVariables
[
i
];
expressionSet
.
setVariable
(
globalVariableIndex
[
i
],
initialGlobalVariables
[
i
]);
}
for
(
int
i
=
0
;
i
<
(
int
)
parameterVariableIndex
.
size
();
i
++
)
{
double
value
=
context
.
getParameter
(
parameterNames
[
i
]);
globalValuesDouble
[
parameterVariableIndex
[
i
]]
=
value
;
expressionSet
.
setVariable
(
parameterVariableIndex
[
i
],
value
);
}
// Record information about the targets of steps that will be stored in global variables.
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
if
(
stepType
[
step
]
==
CustomIntegrator
::
ComputeGlobal
||
stepType
[
step
]
==
CustomIntegrator
::
ComputeSum
)
{
if
(
variable
[
step
]
==
"dt"
)
stepTarget
[
step
].
type
=
DT
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
if
(
variable
[
step
]
==
integrator
.
getGlobalVariableName
(
i
))
stepTarget
[
step
].
type
=
VARIABLE
;
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
if
(
variable
[
step
]
==
parameterNames
[
i
])
stepTarget
[
step
].
type
=
PARAMETER
;
stepTarget
[
step
].
variableIndex
=
expressionSet
.
getVariableIndex
(
variable
[
step
]);
}
}
// Identify which per-DOF steps are going to require global variables or context parameters.
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
if
(
stepType
[
step
]
==
CustomIntegrator
::
ComputePerDof
||
stepType
[
step
]
==
CustomIntegrator
::
ComputeSum
)
{
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
if
(
usesVariable
(
expression
[
step
][
0
],
integrator
.
getGlobalVariableName
(
i
)))
needsGlobals
[
step
]
=
true
;
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
if
(
usesVariable
(
expression
[
step
][
0
],
parameterNames
[
i
]))
needsGlobals
[
step
]
=
true
;
}
invalidatesForces
[
step
]
=
(
stepType
[
step
]
==
CustomIntegrator
::
ConstrainPositions
||
affectsForce
.
find
(
variable
[
step
])
!=
affectsForce
.
end
());
if
(
forceGroup
[
step
]
==
-
2
&&
step
>
0
)
forceGroup
[
step
]
=
forceGroup
[
step
-
1
];
if
(
forceGroup
[
step
]
!=
-
2
&&
savedForces
.
find
(
forceGroup
[
step
])
==
savedForces
.
end
())
savedForces
[
forceGroup
[
step
]]
=
new
CudaArray
(
cu
,
cu
.
getForce
().
getSize
(),
cu
.
getForce
().
getElementSize
(),
"savedForces"
);
}
// Determine how each step will represent the position (as just a value, or a value plus a delta).
...
...
@@ -5911,9 +5950,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
for
(
int
step
=
1
;
step
<
numSteps
;
step
++
)
{
if
(
needsForces
[
step
]
||
needsEnergy
[
step
])
continue
;
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputeGlobal
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputeGlobal
&&
!
usesVariable
(
expression
[
step
],
"uniform"
)
&&
!
usesVariable
(
expression
[
step
],
"gaussian"
))
merged
[
step
]
=
true
;
if
(
stepType
[
step
-
1
]
==
CustomIntegrator
::
ComputePerDof
&&
stepType
[
step
]
==
CustomIntegrator
::
ComputePerDof
)
merged
[
step
]
=
true
;
}
...
...
@@ -5933,15 +5969,15 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
}
int
numGaussian
=
0
,
numUniform
=
0
;
for
(
int
j
=
step
;
j
<
numSteps
&&
(
j
==
step
||
merged
[
j
]);
j
++
)
{
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"gaussian"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
],
"uniform"
);
numGaussian
+=
numAtoms
*
usesVariable
(
expression
[
j
]
[
0
]
,
"gaussian"
);
numUniform
+=
numAtoms
*
usesVariable
(
expression
[
j
]
[
0
]
,
"uniform"
);
compute
<<
"{
\n
"
;
if
(
numGaussian
>
0
)
compute
<<
"float4 gaussian = gaussianValues[gaussianIndex+index];
\n
"
;
if
(
numUniform
>
0
)
compute
<<
"float4 uniform = uniformValues[uniformIndex+index];
\n
"
;
for
(
int
i
=
0
;
i
<
3
;
i
++
)
compute
<<
createPerDofComputation
(
stepType
[
j
]
==
CustomIntegrator
::
ComputePerDof
?
variable
[
j
]
:
""
,
expression
[
j
],
i
,
integrator
,
forceName
[
j
],
energyName
[
j
]);
compute
<<
createPerDofComputation
(
stepType
[
j
]
==
CustomIntegrator
::
ComputePerDof
?
variable
[
j
]
:
""
,
expression
[
j
]
[
0
]
,
i
,
integrator
,
forceName
[
j
],
energyName
[
j
]);
if
(
variable
[
j
]
==
"x"
)
{
if
(
storePosAsDelta
[
j
])
compute
<<
"posDelta[index] = convertFromDouble4(position-convertToDouble4(loadPos(posq, posqCorrection, index)));
\n
"
;
...
...
@@ -6010,14 +6046,12 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
bool
found
=
false
;
for
(
int
j
=
0
;
j
<
integrator
.
getNumGlobalVariables
()
&&
!
found
;
j
++
)
if
(
variable
[
step
]
==
integrator
.
getGlobalVariableName
(
j
))
{
args2
.
push_back
(
&
globalValues
->
getDevicePointer
());
defines
[
"SUM_OUTPUT_INDEX"
]
=
cu
.
intToString
(
j
);
args2
.
push_back
(
&
kineticEnergy
->
getDevicePointer
());
found
=
true
;
}
for
(
int
j
=
0
;
j
<
(
int
)
parameterNames
.
size
()
&&
!
found
;
j
++
)
if
(
variable
[
step
]
==
parameterNames
[
j
])
{
args2
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
defines
[
"SUM_OUTPUT_INDEX"
]
=
cu
.
intToString
(
j
);
args2
.
push_back
(
&
kineticEnergy
->
getDevicePointer
());
found
=
true
;
modifiesParameters
=
true
;
}
...
...
@@ -6035,7 +6069,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
stringstream
compute
;
for
(
int
i
=
step
;
i
<
numSteps
&&
(
i
==
step
||
merged
[
i
]);
i
++
)
compute
<<
"{
\n
"
<<
createGlobalComputation
(
variable
[
i
],
expression
[
i
],
integrator
,
energyName
[
i
])
<<
"}
\n
"
;
compute
<<
"{
\n
"
<<
createGlobalComputation
(
variable
[
i
],
expression
[
i
]
[
0
]
,
integrator
,
energyName
[
i
])
<<
"}
\n
"
;
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_STEP"
]
=
compute
.
str
();
CUmodule
module
=
cu
.
createModule
(
cu
.
replaceStrings
(
CudaKernelSources
::
customIntegratorGlobal
,
replacements
),
defines
);
...
...
@@ -6111,7 +6145,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args
<<
", "
<<
buffer
.
getType
()
<<
"* __restrict__ "
<<
valueName
;
}
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
defines
[
"SUM_BUFFER_SIZE"
]
=
cu
.
intToString
(
3
*
numAtoms
);
if
(
defines
.
find
(
"LOAD_POS_AS_DELTA"
)
!=
defines
.
end
())
defines
.
erase
(
"LOAD_POS_AS_DELTA"
);
...
...
@@ -6144,7 +6177,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
sumKineticEnergyKernel
=
cu
.
getKernel
(
module
,
useDouble
?
"computeDoubleSum"
:
"computeFloatSum"
);
}
// Make sure all values (variables, parameters, etc.)
stored on the device
are up to date.
// Make sure all values (variables, parameters, etc.) are up to date.
if
(
!
deviceValuesAreCurrent
)
{
if
(
useDouble
)
...
...
@@ -6156,38 +6189,14 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
localValuesAreCurrent
=
false
;
double
stepSize
=
integrator
.
getStepSize
();
if
(
stepSize
!=
prevStepSize
)
{
if
(
useDouble
)
{
double
size
[]
=
{
0
,
stepSize
};
integration
.
getStepSize
().
upload
(
size
);
}
else
{
float
size
[]
=
{
0
,
(
float
)
stepSize
};
integration
.
getStepSize
().
upload
(
size
);
}
prevStepSize
=
stepSize
;
recordGlobalValue
(
stepSize
,
GlobalTarget
(
DT
,
dtVariableIndex
));
}
bool
paramsChanged
=
false
;
if
(
useDouble
)
{
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
double
value
=
context
.
getParameter
(
parameterNames
[
i
]);
if
(
value
!=
contextValuesDouble
[
i
])
{
contextValuesDouble
[
i
]
=
value
;
paramsChanged
=
true
;
}
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
double
value
=
context
.
getParameter
(
parameterNames
[
i
]);
if
(
value
!=
globalValuesDouble
[
parameterVariableIndex
[
i
]])
{
globalValuesDouble
[
parameterVariableIndex
[
i
]]
=
value
;
deviceGlobalsAreCurrent
=
false
;
}
if
(
paramsChanged
)
contextParameterValues
->
upload
(
contextValuesDouble
);
}
else
{
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
float
value
=
(
float
)
context
.
getParameter
(
parameterNames
[
i
]);
if
(
value
!=
contextValuesFloat
[
i
])
{
contextValuesFloat
[
i
]
=
value
;
paramsChanged
=
true
;
}
}
if
(
paramsChanged
)
contextParameterValues
->
upload
(
contextValuesFloat
);
}
}
...
...
@@ -6204,7 +6213,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
for
(
int
i
=
0
;
i
<
numSteps
;
i
++
)
{
int
lastForceGroups
=
context
.
getLastForceGroups
();
if
((
needsForces
[
i
]
||
needsEnergy
[
i
])
&&
(
!
forcesAreValid
||
lastForceGroups
!=
forceGroup
[
i
]))
{
if
((
needsForces
[
i
]
||
needsEnergy
[
i
])
&&
(
!
forcesAreValid
||
lastForceGroups
!=
forceGroup
Flags
[
i
]))
{
if
(
forcesAreValid
&&
savedForces
.
find
(
lastForceGroups
)
!=
savedForces
.
end
())
{
// The forces are still valid. We just need a different force group right now. Save the old
// forces in case we need them again.
...
...
@@ -6218,31 +6227,31 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
// Recompute forces and/or energy. Figure out what is actually needed
// between now and the next time they get invalidated again.
bool
computeForce
=
false
,
computeEnergy
=
false
;
for
(
int
j
=
i
;
;
j
++
)
{
if
(
needsForces
[
j
])
computeForce
=
true
;
if
(
needsEnergy
[
j
])
computeEnergy
=
true
;
if
(
invalidatesForces
[
j
])
break
;
if
(
j
==
numSteps
-
1
)
j
=
-
1
;
if
(
j
==
i
-
1
)
break
;
}
if
(
!
computeEnergy
&&
validSavedForces
.
find
(
forceGroup
[
i
])
!=
validSavedForces
.
end
())
{
bool
computeForce
=
(
needsForces
[
i
]
||
computeBothForceAndEnergy
[
i
]);
bool
computeEnergy
=
(
needsEnergy
[
i
]
||
computeBothForceAndEnergy
[
i
]);
if
(
!
computeEnergy
&&
validSavedForces
.
find
(
forceGroupFlags
[
i
])
!=
validSavedForces
.
end
())
{
// We can just restore the forces we saved earlier.
savedForces
[
forceGroup
[
i
]]
->
copyTo
(
cu
.
getForce
());
savedForces
[
forceGroup
Flags
[
i
]]
->
copyTo
(
cu
.
getForce
());
}
else
{
recordChangedParameters
(
context
);
energy
=
context
.
calcForcesAndEnergy
(
computeForce
,
computeEnergy
,
forceGroup
[
i
]);
energy
=
context
.
calcForcesAndEnergy
(
computeForce
,
computeEnergy
,
forceGroup
Flags
[
i
]);
energyFloat
=
(
float
)
energy
;
}
forcesAreValid
=
true
;
}
if
(
needsGlobals
[
i
]
&&
!
deviceGlobalsAreCurrent
)
{
// Upload the global values to the device.
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
())
globalValues
->
upload
(
globalValuesDouble
);
else
{
for
(
int
j
=
0
;
j
<
(
int
)
globalValuesDouble
.
size
();
j
++
)
globalValuesFloat
[
j
]
=
(
float
)
globalValuesDouble
[
j
];
globalValues
->
upload
(
globalValuesFloat
);
}
}
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputePerDof
&&
!
merged
[
i
])
{
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
kernelArgs
[
i
][
0
][
1
]
=
&
posCorrection
;
...
...
@@ -6253,12 +6262,11 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
cu
.
executeKernel
(
randomKernel
,
&
randomArgs
[
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeGlobal
&&
!
merged
[
i
])
{
float
uniform
=
SimTKOpenMMUtilities
::
getUniformlyDistributedRandomNumber
();
float
gauss
=
SimTKOpenMMUtilities
::
getNormallyDistributedRandomNumber
();
kernelArgs
[
i
][
0
][
3
]
=
&
uniform
;
kernelArgs
[
i
][
0
][
4
]
=
&
gauss
;
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
1
,
1
);
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeGlobal
)
{
expressionSet
.
setVariable
(
uniformVariableIndex
,
SimTKOpenMMUtilities
::
getUniformlyDistributedRandomNumber
());
expressionSet
.
setVariable
(
gaussianVariableIndex
,
SimTKOpenMMUtilities
::
getNormallyDistributedRandomNumber
());
expressionSet
.
setVariable
(
stepEnergyVariableIndex
[
i
],
energy
);
recordGlobalValue
(
globalExpressions
[
i
][
0
].
evaluate
(),
stepTarget
[
i
]);
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
ComputeSum
)
{
int
randomIndex
=
integration
.
prepareRandomNumbers
(
requiredGaussian
[
i
]);
...
...
@@ -6271,6 +6279,16 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
cu
.
clearBuffer
(
*
sumBuffer
);
cu
.
executeKernel
(
kernels
[
i
][
0
],
&
kernelArgs
[
i
][
0
][
0
],
numAtoms
);
cu
.
executeKernel
(
kernels
[
i
][
1
],
&
kernelArgs
[
i
][
1
][
0
],
CudaContext
::
ThreadBlockSize
,
CudaContext
::
ThreadBlockSize
);
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
())
{
double
value
;
kineticEnergy
->
download
(
&
value
);
globalValuesDouble
[
stepTarget
[
i
].
variableIndex
]
=
value
;
}
else
{
float
value
;
kineticEnergy
->
download
(
&
value
);
globalValuesDouble
[
stepTarget
[
i
].
variableIndex
]
=
value
;
}
}
else
if
(
stepType
[
i
]
==
CustomIntegrator
::
UpdateContextState
)
{
recordChangedParameters
(
context
);
...
...
@@ -6335,52 +6353,63 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
}
}
void
CudaIntegrateCustomStepKernel
::
recordGlobalValue
(
double
value
,
GlobalTarget
target
)
{
switch
(
target
.
type
)
{
case
DT
:
globalValuesDouble
[
dtVariableIndex
]
=
value
;
deviceGlobalsAreCurrent
=
false
;
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
())
{
double
size
[]
=
{
0
,
value
};
cu
.
getIntegrationUtilities
().
getStepSize
().
upload
(
size
);
}
else
{
float
size
[]
=
{
0
,
(
float
)
value
};
cu
.
getIntegrationUtilities
().
getStepSize
().
upload
(
size
);
}
prevStepSize
=
value
;
break
;
case
VARIABLE
:
case
PARAMETER
:
expressionSet
.
setVariable
(
target
.
variableIndex
,
value
);
globalValuesDouble
[
target
.
variableIndex
]
=
value
;
deviceGlobalsAreCurrent
=
false
;
break
;
}
}
void
CudaIntegrateCustomStepKernel
::
recordChangedParameters
(
ContextImpl
&
context
)
{
if
(
!
modifiesParameters
)
return
;
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
())
{
contextParameterValues
->
download
(
contextValuesDouble
);
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
double
value
=
context
.
getParameter
(
parameterNames
[
i
]);
if
(
value
!=
contextValuesDouble
[
i
])
context
.
setParameter
(
parameterNames
[
i
],
contextValuesDouble
[
i
]);
}
}
else
{
contextParameterValues
->
download
(
contextValuesFloat
);
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
float
value
=
(
float
)
context
.
getParameter
(
parameterNames
[
i
]);
if
(
value
!=
contextValuesFloat
[
i
])
context
.
setParameter
(
parameterNames
[
i
],
contextValuesFloat
[
i
]);
}
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
{
double
value
=
context
.
getParameter
(
parameterNames
[
i
]);
if
(
value
!=
globalValuesDouble
[
parameterVariableIndex
[
i
]])
context
.
setParameter
(
parameterNames
[
i
],
globalValuesDouble
[
parameterVariableIndex
[
i
]]);
}
}
void
CudaIntegrateCustomStepKernel
::
getGlobalVariables
(
ContextImpl
&
context
,
vector
<
double
>&
values
)
const
{
values
.
resize
(
numGlobalVariables
);
if
(
numGlobalVariables
==
0
)
return
;
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
())
globalValues
->
download
(
values
);
else
{
vector
<
float
>
buffer
;
globalValues
->
download
(
buffer
);
for
(
int
i
=
0
;
i
<
numGlobalVariables
;
i
++
)
values
[
i
]
=
buffer
[
i
];
if
(
globalValues
==
NULL
)
{
// The data structures haven't been created yet, so just return the list of values that was given earlier.
values
=
initialGlobalVariables
;
}
values
.
resize
(
numGlobalVariables
);
for
(
int
i
=
0
;
i
<
numGlobalVariables
;
i
++
)
values
[
i
]
=
globalValuesDouble
[
globalVariableIndex
[
i
]];
}
void
CudaIntegrateCustomStepKernel
::
setGlobalVariables
(
ContextImpl
&
context
,
const
vector
<
double
>&
values
)
{
if
(
numGlobalVariables
==
0
)
return
;
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
())
globalValues
->
upload
(
values
);
else
{
vector
<
float
>
buffer
(
numGlobalVariables
);
for
(
int
i
=
0
;
i
<
numGlobalVariables
;
i
++
)
buffer
[
i
]
=
(
float
)
values
[
i
];
globalValues
->
upload
(
buffer
);
if
(
globalValues
==
NULL
)
{
// The data structures haven't been created yet, so just store the list of values.
initialGlobalVariables
=
values
;
return
;
}
for
(
int
i
=
0
;
i
<
numGlobalVariables
;
i
++
)
globalValuesDouble
[
globalVariableIndex
[
i
]]
=
values
[
i
];
deviceGlobalsAreCurrent
=
false
;
}
void
CudaIntegrateCustomStepKernel
::
getPerDofVariable
(
ContextImpl
&
context
,
int
variable
,
vector
<
Vec3
>&
values
)
const
{
...
...
platforms/cuda/src/kernels/customIntegrator.cu
View file @
59c809c0
...
...
@@ -11,7 +11,7 @@ extern "C" __global__ void computeFloatSum(const float* __restrict__ sumBuffer,
tempBuffer
[
thread
]
+=
tempBuffer
[
thread
+
i
];
}
if
(
thread
==
0
)
result
[
SUM_OUTPUT_INDEX
]
=
tempBuffer
[
0
];
*
result
=
tempBuffer
[
0
];
}
extern
"C"
__global__
void
computeDoubleSum
(
const
double
*
__restrict__
sumBuffer
,
double
*
result
)
{
...
...
@@ -27,7 +27,7 @@ extern "C" __global__ void computeDoubleSum(const double* __restrict__ sumBuffer
tempBuffer
[
thread
]
+=
tempBuffer
[
thread
+
i
];
}
if
(
thread
==
0
)
result
[
SUM_OUTPUT_INDEX
]
=
tempBuffer
[
0
];
*
result
=
tempBuffer
[
0
];
}
extern
"C"
__global__
void
applyPositionDeltas
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
)
{
...
...
platforms/cuda/src/kernels/integrationUtilities.cu
View file @
59c809c0
...
...
@@ -224,7 +224,6 @@ extern "C" __global__ void applyShakeToVelocities(int numClusters, mixed tol, co
mixed4
xpj2
=
make_mixed4
(
0
);
float
invMassCentral
=
params
.
x
;
float
avgMass
=
params
.
y
;
float
d2
=
params
.
z
;
float
invMassPeripheral
=
params
.
w
;
if
(
atoms
.
z
!=
-
1
)
{
pos2
=
loadPos
(
oldPos
,
posCorrection
,
atoms
.
z
);
...
...
@@ -245,9 +244,6 @@ extern "C" __global__ void applyShakeToVelocities(int numClusters, mixed tol, co
mixed
rij1sq
=
rij1
.
x
*
rij1
.
x
+
rij1
.
y
*
rij1
.
y
+
rij1
.
z
*
rij1
.
z
;
mixed
rij2sq
=
rij2
.
x
*
rij2
.
x
+
rij2
.
y
*
rij2
.
y
+
rij2
.
z
*
rij2
.
z
;
mixed
rij3sq
=
rij3
.
x
*
rij3
.
x
+
rij3
.
y
*
rij3
.
y
+
rij3
.
z
*
rij3
.
z
;
mixed
ld1
=
d2
-
rij1sq
;
mixed
ld2
=
d2
-
rij2sq
;
mixed
ld3
=
d2
-
rij3sq
;
// Iterate until convergence.
...
...
@@ -605,8 +601,6 @@ extern "C" __global__ void computeCCMAVelocityConstraintForce(const int2* __rest
if
(
threadIdx
.
x
==
0
)
groupConverged
=
1
;
__syncthreads
();
mixed
lowerTol
=
1
-
2
*
tol
+
tol
*
tol
;
mixed
upperTol
=
1
+
2
*
tol
+
tol
*
tol
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_CCMA_CONSTRAINTS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Compute the force due to this constraint.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment