Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
18295108
Commit
18295108
authored
Sep 05, 2017
by
peastman
Browse files
Merge changes from main branch
parents
e6101f68
8d7234e5
Changes
154
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
747 additions
and
247 deletions
+747
-247
platforms/cuda/include/CudaIntegrationUtilities.h
platforms/cuda/include/CudaIntegrationUtilities.h
+5
-2
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+57
-3
platforms/cuda/include/CudaPlatform.h
platforms/cuda/include/CudaPlatform.h
+2
-1
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+74
-34
platforms/cuda/src/CudaIntegrationUtilities.cpp
platforms/cuda/src/CudaIntegrationUtilities.cpp
+62
-36
platforms/cuda/src/CudaKernelFactory.cpp
platforms/cuda/src/CudaKernelFactory.cpp
+2
-0
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+272
-47
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+25
-5
platforms/cuda/src/kernels/customCVForce.cu
platforms/cuda/src/kernels/customCVForce.cu
+38
-0
platforms/cuda/src/kernels/customHbondForce.cu
platforms/cuda/src/kernels/customHbondForce.cu
+9
-16
platforms/cuda/src/kernels/customManyParticle.cu
platforms/cuda/src/kernels/customManyParticle.cu
+5
-0
platforms/cuda/src/kernels/integrationUtilities.cu
platforms/cuda/src/kernels/integrationUtilities.cu
+67
-93
platforms/cuda/src/kernels/utilities.cu
platforms/cuda/src/kernels/utilities.cu
+19
-0
platforms/cuda/tests/TestCudaCustomCVForce.cpp
platforms/cuda/tests/TestCudaCustomCVForce.cpp
+36
-0
platforms/cuda/tests/TestCudaFFT3D.cpp
platforms/cuda/tests/TestCudaFFT3D.cpp
+1
-1
platforms/cuda/tests/TestCudaRandom.cpp
platforms/cuda/tests/TestCudaRandom.cpp
+1
-1
platforms/cuda/tests/TestCudaSort.cpp
platforms/cuda/tests/TestCudaSort.cpp
+1
-1
platforms/opencl/include/OpenCLContext.h
platforms/opencl/include/OpenCLContext.h
+9
-2
platforms/opencl/include/OpenCLIntegrationUtilities.h
platforms/opencl/include/OpenCLIntegrationUtilities.h
+5
-2
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+57
-3
No files found.
platforms/cuda/include/CudaIntegrationUtilities.h
View file @
18295108
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
4
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -158,8 +158,11 @@ private:
...
@@ -158,8 +158,11 @@ private:
CudaArray
*
vsite3AvgWeights
;
CudaArray
*
vsite3AvgWeights
;
CudaArray
*
vsiteOutOfPlaneAtoms
;
CudaArray
*
vsiteOutOfPlaneAtoms
;
CudaArray
*
vsiteOutOfPlaneWeights
;
CudaArray
*
vsiteOutOfPlaneWeights
;
CudaArray
*
vsiteLocalCoordsIndex
;
CudaArray
*
vsiteLocalCoordsAtoms
;
CudaArray
*
vsiteLocalCoordsAtoms
;
CudaArray
*
vsiteLocalCoordsParams
;
CudaArray
*
vsiteLocalCoordsWeights
;
CudaArray
*
vsiteLocalCoordsPos
;
CudaArray
*
vsiteLocalCoordsStartIndex
;
int
randomPos
;
int
randomPos
;
int
lastSeed
,
numVsites
;
int
lastSeed
,
numVsites
;
double2
lastStepSize
;
double2
lastStepSize
;
...
...
platforms/cuda/include/CudaKernels.h
View file @
18295108
...
@@ -38,6 +38,7 @@
...
@@ -38,6 +38,7 @@
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/CustomIntegratorUtilities.h"
#include "openmm/internal/CustomIntegratorUtilities.h"
#include "lepton/CompiledExpression.h"
#include "lepton/CompiledExpression.h"
#include "lepton/ExpressionProgram.h"
#include <cufft.h>
#include <cufft.h>
namespace
OpenMM
{
namespace
OpenMM
{
...
@@ -1229,6 +1230,54 @@ private:
...
@@ -1229,6 +1230,54 @@ private:
CUevent
event
;
CUevent
event
;
};
};
/**
* This kernel is invoked by CustomCVForce to calculate the forces acting on the system and the energy of the system.
*/
class
CudaCalcCustomCVForceKernel
:
public
CalcCustomCVForceKernel
{
public:
CudaCalcCustomCVForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
CalcCustomCVForceKernel
(
name
,
platform
),
cu
(
cu
),
hasInitializedListeners
(
false
),
invAtomOrder
(
NULL
),
innerInvAtomOrder
(
NULL
)
{
}
~
CudaCalcCustomCVForceKernel
();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomCVForce this kernel will be used for
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/
void
initialize
(
const
System
&
system
,
const
CustomCVForce
&
force
,
ContextImpl
&
innerContext
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
ContextImpl
&
innerContext
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy state information to the inner context.
*
* @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/
void
copyState
(
ContextImpl
&
context
,
ContextImpl
&
innerContext
);
private:
class
ReorderListener
;
CudaContext
&
cu
;
bool
hasInitializedListeners
;
Lepton
::
ExpressionProgram
energyExpression
;
std
::
vector
<
std
::
string
>
variableNames
,
paramDerivNames
,
globalParameterNames
;
std
::
vector
<
Lepton
::
ExpressionProgram
>
variableDerivExpressions
;
std
::
vector
<
Lepton
::
ExpressionProgram
>
paramDerivExpressions
;
std
::
vector
<
CudaArray
*>
cvForces
;
CudaArray
*
invAtomOrder
;
CudaArray
*
innerInvAtomOrder
;
CUfunction
copyStateKernel
,
copyForcesKernel
,
addForcesKernel
;
};
/**
/**
* This kernel is invoked by VerletIntegrator to take one time step.
* This kernel is invoked by VerletIntegrator to take one time step.
*/
*/
...
@@ -1485,7 +1534,9 @@ private:
...
@@ -1485,7 +1534,9 @@ private:
class
ReorderListener
;
class
ReorderListener
;
class
GlobalTarget
;
class
GlobalTarget
;
class
DerivFunction
;
class
DerivFunction
;
std
::
string
createPerDofComputation
(
const
std
::
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
int
component
,
CustomIntegrator
&
integrator
,
const
std
::
string
&
forceName
,
const
std
::
string
&
energyName
);
std
::
string
createPerDofComputation
(
const
std
::
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
int
component
,
CustomIntegrator
&
integrator
,
const
std
::
string
&
forceName
,
const
std
::
string
&
energyName
,
std
::
vector
<
const
TabulatedFunction
*>&
functions
,
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functionNames
);
void
prepareForComputation
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
void
prepareForComputation
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
Lepton
::
ExpressionTreeNode
replaceDerivFunctions
(
const
Lepton
::
ExpressionTreeNode
&
node
,
OpenMM
::
ContextImpl
&
context
);
Lepton
::
ExpressionTreeNode
replaceDerivFunctions
(
const
Lepton
::
ExpressionTreeNode
&
node
,
OpenMM
::
ContextImpl
&
context
);
void
findExpressionsForDerivs
(
const
Lepton
::
ExpressionTreeNode
&
node
,
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
variableNodes
);
void
findExpressionsForDerivs
(
const
Lepton
::
ExpressionTreeNode
&
node
,
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
variableNodes
);
...
@@ -1495,7 +1546,7 @@ private:
...
@@ -1495,7 +1546,7 @@ private:
CudaContext
&
cu
;
CudaContext
&
cu
;
double
energy
;
double
energy
;
float
energyFloat
;
float
energyFloat
;
int
numGlobalVariables
;
int
numGlobalVariables
,
sumWorkGroupSize
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
deviceGlobalsAreCurrent
,
modifiesParameters
,
keNeedsForce
,
hasAnyConstraints
,
needsEnergyParamDerivs
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
deviceGlobalsAreCurrent
,
modifiesParameters
,
keNeedsForce
,
hasAnyConstraints
,
needsEnergyParamDerivs
;
mutable
bool
localValuesAreCurrent
;
mutable
bool
localValuesAreCurrent
;
CudaArray
*
globalValues
;
CudaArray
*
globalValues
;
...
@@ -1504,6 +1555,8 @@ private:
...
@@ -1504,6 +1555,8 @@ private:
CudaArray
*
uniformRandoms
;
CudaArray
*
uniformRandoms
;
CudaArray
*
randomSeed
;
CudaArray
*
randomSeed
;
CudaArray
*
perDofEnergyParamDerivs
;
CudaArray
*
perDofEnergyParamDerivs
;
std
::
vector
<
CudaArray
*>
tabulatedFunctions
;
std
::
map
<
int
,
double
>
savedEnergy
;
std
::
map
<
int
,
CudaArray
*>
savedForces
;
std
::
map
<
int
,
CudaArray
*>
savedForces
;
std
::
set
<
int
>
validSavedForces
;
std
::
set
<
int
>
validSavedForces
;
CudaParameterSet
*
perDofValues
;
CudaParameterSet
*
perDofValues
;
...
@@ -1587,7 +1640,7 @@ private:
...
@@ -1587,7 +1640,7 @@ private:
class
CudaApplyMonteCarloBarostatKernel
:
public
ApplyMonteCarloBarostatKernel
{
class
CudaApplyMonteCarloBarostatKernel
:
public
ApplyMonteCarloBarostatKernel
{
public:
public:
CudaApplyMonteCarloBarostatKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
ApplyMonteCarloBarostatKernel
(
name
,
platform
),
cu
(
cu
),
CudaApplyMonteCarloBarostatKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
ApplyMonteCarloBarostatKernel
(
name
,
platform
),
cu
(
cu
),
hasInitializedKernels
(
false
),
savedPositions
(
NULL
),
moleculeAtoms
(
NULL
),
moleculeStartIndex
(
NULL
)
{
hasInitializedKernels
(
false
),
savedPositions
(
NULL
),
savedForces
(
NULL
),
moleculeAtoms
(
NULL
),
moleculeStartIndex
(
NULL
)
{
}
}
~
CudaApplyMonteCarloBarostatKernel
();
~
CudaApplyMonteCarloBarostatKernel
();
/**
/**
...
@@ -1622,6 +1675,7 @@ private:
...
@@ -1622,6 +1675,7 @@ private:
bool
hasInitializedKernels
;
bool
hasInitializedKernels
;
int
numMolecules
;
int
numMolecules
;
CudaArray
*
savedPositions
;
CudaArray
*
savedPositions
;
CudaArray
*
savedForces
;
CudaArray
*
moleculeAtoms
;
CudaArray
*
moleculeAtoms
;
CudaArray
*
moleculeStartIndex
;
CudaArray
*
moleculeStartIndex
;
CUfunction
kernel
;
CUfunction
kernel
;
...
...
platforms/cuda/include/CudaPlatform.h
View file @
18295108
...
@@ -53,6 +53,7 @@ public:
...
@@ -53,6 +53,7 @@ public:
const
std
::
string
&
getPropertyValue
(
const
Context
&
context
,
const
std
::
string
&
property
)
const
;
const
std
::
string
&
getPropertyValue
(
const
Context
&
context
,
const
std
::
string
&
property
)
const
;
void
setPropertyValue
(
Context
&
context
,
const
std
::
string
&
property
,
const
std
::
string
&
value
)
const
;
void
setPropertyValue
(
Context
&
context
,
const
std
::
string
&
property
,
const
std
::
string
&
value
)
const
;
void
contextCreated
(
ContextImpl
&
context
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
properties
)
const
;
void
contextCreated
(
ContextImpl
&
context
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
properties
)
const
;
void
linkedContextCreated
(
ContextImpl
&
context
,
ContextImpl
&
originalContext
)
const
;
void
contextDestroyed
(
ContextImpl
&
context
)
const
;
void
contextDestroyed
(
ContextImpl
&
context
)
const
;
/**
/**
* This is the name of the parameter for selecting which CUDA device or devices to use.
* This is the name of the parameter for selecting which CUDA device or devices to use.
...
@@ -130,7 +131,7 @@ class OPENMM_EXPORT_CUDA CudaPlatform::PlatformData {
...
@@ -130,7 +131,7 @@ class OPENMM_EXPORT_CUDA CudaPlatform::PlatformData {
public:
public:
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
blockingProperty
,
const
std
::
string
&
precisionProperty
,
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
blockingProperty
,
const
std
::
string
&
precisionProperty
,
const
std
::
string
&
cpuPmeProperty
,
const
std
::
string
&
compilerProperty
,
const
std
::
string
&
tempProperty
,
const
std
::
string
&
hostCompilerProperty
,
const
std
::
string
&
cpuPmeProperty
,
const
std
::
string
&
compilerProperty
,
const
std
::
string
&
tempProperty
,
const
std
::
string
&
hostCompilerProperty
,
const
std
::
string
&
pmeStreamProperty
,
const
std
::
string
&
deterministicForcesProperty
,
int
numThreads
);
const
std
::
string
&
pmeStreamProperty
,
const
std
::
string
&
deterministicForcesProperty
,
int
numThreads
,
ContextImpl
*
originalContext
);
~
PlatformData
();
~
PlatformData
();
void
initializeContexts
(
const
System
&
system
);
void
initializeContexts
(
const
System
&
system
);
void
syncContexts
();
void
syncContexts
();
...
...
platforms/cuda/src/CudaContext.cpp
View file @
18295108
...
@@ -106,9 +106,9 @@ static int executeInWindows(const string &command) {
...
@@ -106,9 +106,9 @@ static int executeInWindows(const string &command) {
#endif
#endif
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
const
string
&
tempDir
,
const
std
::
string
&
hostCompiler
,
CudaPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
currentStream
(
0
),
const
string
&
tempDir
,
const
std
::
string
&
hostCompiler
,
CudaPlatform
::
PlatformData
&
platformData
,
CudaContext
*
originalContext
)
:
system
(
system
),
currentStream
(
0
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
hasCompilerKernel
(
false
),
isNvccAvailable
(
false
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
hasCompilerKernel
(
false
),
isNvccAvailable
(
false
),
pinnedBuffer
(
NULL
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
force
(
NULL
),
energyBuffer
(
NULL
),
energyParamDerivBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
chargeBuffer
(
NULL
),
pinnedBuffer
(
NULL
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
force
(
NULL
),
energyBuffer
(
NULL
),
energySum
(
NULL
),
energyParamDerivBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
chargeBuffer
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
// Determine what compiler to use.
// Determine what compiler to use.
...
@@ -173,8 +173,10 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -173,8 +173,10 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
cacheDir
=
cacheDir
+
"/"
;
cacheDir
=
cacheDir
+
"/"
;
#endif
#endif
contextIndex
=
platformData
.
contexts
.
size
();
contextIndex
=
platformData
.
contexts
.
size
();
int
numDevices
;
string
errorMessage
=
"Error initializing Context"
;
string
errorMessage
=
"Error initializing Context"
;
if
(
originalContext
==
NULL
)
{
isLinkedContext
=
false
;
int
numDevices
;
CHECK_RESULT
(
cuDeviceGetCount
(
&
numDevices
));
CHECK_RESULT
(
cuDeviceGetCount
(
&
numDevices
));
if
(
deviceIndex
<
-
1
||
deviceIndex
>=
numDevices
)
if
(
deviceIndex
<
-
1
||
deviceIndex
>=
numDevices
)
throw
OpenMMException
(
"Illegal value for DeviceIndex: "
+
intToString
(
deviceIndex
));
throw
OpenMMException
(
"Illegal value for DeviceIndex: "
+
intToString
(
deviceIndex
));
...
@@ -207,6 +209,13 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -207,6 +209,13 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
throw
OpenMMException
(
"The requested CUDA device could not be loaded"
);
throw
OpenMMException
(
"The requested CUDA device could not be loaded"
);
else
else
throw
OpenMMException
(
"No compatible CUDA device is available"
);
throw
OpenMMException
(
"No compatible CUDA device is available"
);
}
else
{
isLinkedContext
=
true
;
context
=
originalContext
->
context
;
this
->
deviceIndex
=
originalContext
->
deviceIndex
;
this
->
device
=
originalContext
->
device
;
}
int
major
,
minor
;
int
major
,
minor
;
CHECK_RESULT
(
cuDeviceComputeCapability
(
&
major
,
&
minor
,
device
));
CHECK_RESULT
(
cuDeviceComputeCapability
(
&
major
,
&
minor
,
device
));
...
@@ -227,6 +236,12 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -227,6 +236,12 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
minor
=
3
;
minor
=
3
;
}
}
}
}
if
(
major
==
7
)
{
// Don't generate Volta-specific code until we've made the changes needed
// to support it properly.
major
=
6
;
minor
=
0
;
}
gpuArchitecture
=
intToString
(
major
)
+
intToString
(
minor
);
gpuArchitecture
=
intToString
(
major
)
+
intToString
(
minor
);
computeCapability
=
major
+
0.1
*
minor
;
computeCapability
=
major
+
0.1
*
minor
;
...
@@ -292,6 +307,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -292,6 +307,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
clearFourBuffersKernel
=
getKernel
(
utilities
,
"clearFourBuffers"
);
clearFourBuffersKernel
=
getKernel
(
utilities
,
"clearFourBuffers"
);
clearFiveBuffersKernel
=
getKernel
(
utilities
,
"clearFiveBuffers"
);
clearFiveBuffersKernel
=
getKernel
(
utilities
,
"clearFiveBuffers"
);
clearSixBuffersKernel
=
getKernel
(
utilities
,
"clearSixBuffers"
);
clearSixBuffersKernel
=
getKernel
(
utilities
,
"clearSixBuffers"
);
reduceEnergyKernel
=
getKernel
(
utilities
,
"reduceEnergy"
);
setChargesKernel
=
getKernel
(
utilities
,
"setCharges"
);
setChargesKernel
=
getKernel
(
utilities
,
"setCharges"
);
// Set defines based on the requested precision.
// Set defines based on the requested precision.
...
@@ -405,6 +421,8 @@ CudaContext::~CudaContext() {
...
@@ -405,6 +421,8 @@ CudaContext::~CudaContext() {
delete
force
;
delete
force
;
if
(
energyBuffer
!=
NULL
)
if
(
energyBuffer
!=
NULL
)
delete
energyBuffer
;
delete
energyBuffer
;
if
(
energySum
!=
NULL
)
delete
energySum
;
if
(
energyParamDerivBuffer
!=
NULL
)
if
(
energyParamDerivBuffer
!=
NULL
)
delete
energyParamDerivBuffer
;
delete
energyParamDerivBuffer
;
if
(
atomIndexDevice
!=
NULL
)
if
(
atomIndexDevice
!=
NULL
)
...
@@ -422,7 +440,7 @@ CudaContext::~CudaContext() {
...
@@ -422,7 +440,7 @@ CudaContext::~CudaContext() {
if
(
thread
!=
NULL
)
if
(
thread
!=
NULL
)
delete
thread
;
delete
thread
;
string
errorMessage
=
"Error deleting Context"
;
string
errorMessage
=
"Error deleting Context"
;
if
(
contextIsValid
)
{
if
(
contextIsValid
&&
!
isLinkedContext
)
{
cuProfilerStop
();
cuProfilerStop
();
CHECK_RESULT
(
cuCtxDestroy
(
context
));
CHECK_RESULT
(
cuCtxDestroy
(
context
));
}
}
...
@@ -435,16 +453,19 @@ void CudaContext::initialize() {
...
@@ -435,16 +453,19 @@ void CudaContext::initialize() {
int
numEnergyBuffers
=
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
());
int
numEnergyBuffers
=
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
());
if
(
useDoublePrecision
)
{
if
(
useDoublePrecision
)
{
energyBuffer
=
CudaArray
::
create
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energyBuffer
=
CudaArray
::
create
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energySum
=
CudaArray
::
create
<
double
>
(
*
this
,
1
,
"energySum"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
}
}
else
if
(
useMixedPrecision
)
{
else
if
(
useMixedPrecision
)
{
energyBuffer
=
CudaArray
::
create
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energyBuffer
=
CudaArray
::
create
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energySum
=
CudaArray
::
create
<
double
>
(
*
this
,
1
,
"energySum"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
}
}
else
{
else
{
energyBuffer
=
CudaArray
::
create
<
float
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energyBuffer
=
CudaArray
::
create
<
float
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energySum
=
CudaArray
::
create
<
float
>
(
*
this
,
1
,
"energySum"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
6
,
numEnergyBuffers
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
6
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
float
),
0
));
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
float
),
0
));
}
}
...
@@ -864,6 +885,18 @@ void CudaContext::clearAutoclearBuffers() {
...
@@ -864,6 +885,18 @@ void CudaContext::clearAutoclearBuffers() {
}
}
}
}
double
CudaContext
::
reduceEnergy
()
{
int
bufferSize
=
energyBuffer
->
getSize
();
int
workGroupSize
=
512
;
void
*
args
[]
=
{
&
energyBuffer
->
getDevicePointer
(),
&
energySum
->
getDevicePointer
(),
&
bufferSize
,
&
workGroupSize
};
executeKernel
(
reduceEnergyKernel
,
args
,
workGroupSize
,
workGroupSize
,
workGroupSize
*
energyBuffer
->
getElementSize
());
energySum
->
download
(
pinnedBuffer
);
if
(
getUseDoublePrecision
()
||
getUseMixedPrecision
())
return
*
((
double
*
)
pinnedBuffer
);
else
return
*
((
float
*
)
pinnedBuffer
);
}
void
CudaContext
::
setCharges
(
const
vector
<
double
>&
charges
)
{
void
CudaContext
::
setCharges
(
const
vector
<
double
>&
charges
)
{
if
(
chargeBuffer
==
NULL
)
if
(
chargeBuffer
==
NULL
)
chargeBuffer
=
new
CudaArray
(
*
this
,
numAtoms
,
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
),
"chargeBuffer"
);
chargeBuffer
=
new
CudaArray
(
*
this
,
numAtoms
,
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
),
"chargeBuffer"
);
...
@@ -1050,9 +1083,16 @@ void CudaContext::findMoleculeGroups() {
...
@@ -1050,9 +1083,16 @@ void CudaContext::findMoleculeGroups() {
for
(
int
i
=
0
;
i
<
(
int
)
forces
.
size
()
&&
identical
;
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
forces
.
size
()
&&
identical
;
i
++
)
{
if
(
mol
.
groups
[
i
].
size
()
!=
mol2
.
groups
[
i
].
size
())
if
(
mol
.
groups
[
i
].
size
()
!=
mol2
.
groups
[
i
].
size
())
identical
=
false
;
identical
=
false
;
for
(
int
k
=
0
;
k
<
(
int
)
mol
.
groups
[
i
].
size
()
&&
identical
;
k
++
)
for
(
int
k
=
0
;
k
<
(
int
)
mol
.
groups
[
i
].
size
()
&&
identical
;
k
++
)
{
if
(
!
forces
[
i
]
->
areGroupsIdentical
(
mol
.
groups
[
i
][
k
],
mol2
.
groups
[
i
][
k
]))
if
(
!
forces
[
i
]
->
areGroupsIdentical
(
mol
.
groups
[
i
][
k
],
mol2
.
groups
[
i
][
k
]))
identical
=
false
;
identical
=
false
;
vector
<
int
>
p1
,
p2
;
forces
[
i
]
->
getParticlesInGroup
(
mol
.
groups
[
i
][
k
],
p1
);
forces
[
i
]
->
getParticlesInGroup
(
mol2
.
groups
[
i
][
k
],
p2
);
for
(
int
m
=
0
;
m
<
p1
.
size
();
m
++
)
if
(
p1
[
m
]
!=
p2
[
m
]
-
atomOffset
)
identical
=
false
;
}
}
}
if
(
identical
)
{
if
(
identical
)
{
moleculeInstances
[
j
].
push_back
(
molIndex
);
moleculeInstances
[
j
].
push_back
(
molIndex
);
...
...
platforms/cuda/src/CudaIntegrationUtilities.cpp
View file @
18295108
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -103,7 +103,8 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
...
@@ -103,7 +103,8 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
ccmaReducedMass
(
NULL
),
ccmaAtomConstraints
(
NULL
),
ccmaNumAtomConstraints
(
NULL
),
ccmaConstraintMatrixColumn
(
NULL
),
ccmaReducedMass
(
NULL
),
ccmaAtomConstraints
(
NULL
),
ccmaNumAtomConstraints
(
NULL
),
ccmaConstraintMatrixColumn
(
NULL
),
ccmaConstraintMatrixValue
(
NULL
),
ccmaDelta1
(
NULL
),
ccmaDelta2
(
NULL
),
ccmaConverged
(
NULL
),
ccmaConvergedMemory
(
NULL
),
ccmaConstraintMatrixValue
(
NULL
),
ccmaDelta1
(
NULL
),
ccmaDelta2
(
NULL
),
ccmaConverged
(
NULL
),
ccmaConvergedMemory
(
NULL
),
vsite2AvgAtoms
(
NULL
),
vsite2AvgWeights
(
NULL
),
vsite3AvgAtoms
(
NULL
),
vsite3AvgWeights
(
NULL
),
vsite2AvgAtoms
(
NULL
),
vsite2AvgWeights
(
NULL
),
vsite3AvgAtoms
(
NULL
),
vsite3AvgWeights
(
NULL
),
vsiteOutOfPlaneAtoms
(
NULL
),
vsiteOutOfPlaneWeights
(
NULL
),
vsiteLocalCoordsAtoms
(
NULL
),
vsiteLocalCoordsParams
(
NULL
)
{
vsiteOutOfPlaneAtoms
(
NULL
),
vsiteOutOfPlaneWeights
(
NULL
),
vsiteLocalCoordsIndex
(
NULL
),
vsiteLocalCoordsAtoms
(
NULL
),
vsiteLocalCoordsWeights
(
NULL
),
vsiteLocalCoordsPos
(
NULL
),
vsiteLocalCoordsStartIndex
(
NULL
)
{
// Create workspace arrays.
// Create workspace arrays.
lastStepSize
=
make_double2
(
0.0
,
0.0
);
lastStepSize
=
make_double2
(
0.0
,
0.0
);
...
@@ -454,8 +455,11 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
...
@@ -454,8 +455,11 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
vector
<
double4
>
vsite3AvgWeightVec
;
vector
<
double4
>
vsite3AvgWeightVec
;
vector
<
int4
>
vsiteOutOfPlaneAtomVec
;
vector
<
int4
>
vsiteOutOfPlaneAtomVec
;
vector
<
double4
>
vsiteOutOfPlaneWeightVec
;
vector
<
double4
>
vsiteOutOfPlaneWeightVec
;
vector
<
int4
>
vsiteLocalCoordsAtomVec
;
vector
<
int
>
vsiteLocalCoordsIndexVec
;
vector
<
double
>
vsiteLocalCoordsParamVec
;
vector
<
int
>
vsiteLocalCoordsAtomVec
;
vector
<
int
>
vsiteLocalCoordsStartVec
;
vector
<
double
>
vsiteLocalCoordsWeightVec
;
vector
<
double4
>
vsiteLocalCoordsPosVec
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
if
(
system
.
isVirtualSite
(
i
))
{
if
(
system
.
isVirtualSite
(
i
))
{
if
(
dynamic_cast
<
const
TwoParticleAverageSite
*>
(
&
system
.
getVirtualSite
(
i
))
!=
NULL
)
{
if
(
dynamic_cast
<
const
TwoParticleAverageSite
*>
(
&
system
.
getVirtualSite
(
i
))
!=
NULL
)
{
...
@@ -480,64 +484,72 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
...
@@ -480,64 +484,72 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
vsiteOutOfPlaneWeightVec
.
push_back
(
make_double4
(
site
.
getWeight12
(),
site
.
getWeight13
(),
site
.
getWeightCross
(),
0.0
));
vsiteOutOfPlaneWeightVec
.
push_back
(
make_double4
(
site
.
getWeight12
(),
site
.
getWeight13
(),
site
.
getWeightCross
(),
0.0
));
}
}
else
if
(
dynamic_cast
<
const
LocalCoordinatesSite
*>
(
&
system
.
getVirtualSite
(
i
))
!=
NULL
)
{
else
if
(
dynamic_cast
<
const
LocalCoordinatesSite
*>
(
&
system
.
getVirtualSite
(
i
))
!=
NULL
)
{
// A
n out of plane
site.
// A
local coordinates
site.
const
LocalCoordinatesSite
&
site
=
dynamic_cast
<
const
LocalCoordinatesSite
&>
(
system
.
getVirtualSite
(
i
));
const
LocalCoordinatesSite
&
site
=
dynamic_cast
<
const
LocalCoordinatesSite
&>
(
system
.
getVirtualSite
(
i
));
vsiteLocalCoordsAtomVec
.
push_back
(
make_int4
(
i
,
site
.
getParticle
(
0
),
site
.
getParticle
(
1
),
site
.
getParticle
(
2
)));
int
numParticles
=
site
.
getNumParticles
();
Vec3
origin
=
site
.
getOriginWeights
();
vector
<
double
>
origin
,
x
,
y
;
Vec3
x
=
site
.
getXWeights
();
site
.
getOriginWeights
(
origin
);
Vec3
y
=
site
.
getYWeights
();
site
.
getXWeights
(
x
);
site
.
getYWeights
(
y
);
vsiteLocalCoordsIndexVec
.
push_back
(
i
);
vsiteLocalCoordsStartVec
.
push_back
(
vsiteLocalCoordsAtomVec
.
size
());
for
(
int
j
=
0
;
j
<
numParticles
;
j
++
)
{
vsiteLocalCoordsAtomVec
.
push_back
(
site
.
getParticle
(
j
));
vsiteLocalCoordsWeightVec
.
push_back
(
origin
[
j
]);
vsiteLocalCoordsWeightVec
.
push_back
(
x
[
j
]);
vsiteLocalCoordsWeightVec
.
push_back
(
y
[
j
]);
}
Vec3
pos
=
site
.
getLocalPosition
();
Vec3
pos
=
site
.
getLocalPosition
();
vsiteLocalCoordsParamVec
.
push_back
(
origin
[
0
]);
vsiteLocalCoordsPosVec
.
push_back
(
make_double4
(
pos
[
0
],
pos
[
1
],
pos
[
2
],
0.0
));
vsiteLocalCoordsParamVec
.
push_back
(
origin
[
1
]);
vsiteLocalCoordsParamVec
.
push_back
(
origin
[
2
]);
vsiteLocalCoordsParamVec
.
push_back
(
x
[
0
]);
vsiteLocalCoordsParamVec
.
push_back
(
x
[
1
]);
vsiteLocalCoordsParamVec
.
push_back
(
x
[
2
]);
vsiteLocalCoordsParamVec
.
push_back
(
y
[
0
]);
vsiteLocalCoordsParamVec
.
push_back
(
y
[
1
]);
vsiteLocalCoordsParamVec
.
push_back
(
y
[
2
]);
vsiteLocalCoordsParamVec
.
push_back
(
pos
[
0
]);
vsiteLocalCoordsParamVec
.
push_back
(
pos
[
1
]);
vsiteLocalCoordsParamVec
.
push_back
(
pos
[
2
]);
}
}
}
}
}
}
vsiteLocalCoordsStartVec
.
push_back
(
vsiteLocalCoordsAtomVec
.
size
());
int
num2Avg
=
vsite2AvgAtomVec
.
size
();
int
num2Avg
=
vsite2AvgAtomVec
.
size
();
int
num3Avg
=
vsite3AvgAtomVec
.
size
();
int
num3Avg
=
vsite3AvgAtomVec
.
size
();
int
numOutOfPlane
=
vsiteOutOfPlaneAtomVec
.
size
();
int
numOutOfPlane
=
vsiteOutOfPlaneAtomVec
.
size
();
int
numLocalCoords
=
vsiteLocalCoords
Atom
Vec
.
size
();
int
numLocalCoords
=
vsiteLocalCoords
Pos
Vec
.
size
();
vsite2AvgAtoms
=
CudaArray
::
create
<
int4
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgAtoms"
);
vsite2AvgAtoms
=
CudaArray
::
create
<
int4
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgAtoms"
);
vsite3AvgAtoms
=
CudaArray
::
create
<
int4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgAtoms"
);
vsite3AvgAtoms
=
CudaArray
::
create
<
int4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgAtoms"
);
vsiteOutOfPlaneAtoms
=
CudaArray
::
create
<
int4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneAtoms"
);
vsiteOutOfPlaneAtoms
=
CudaArray
::
create
<
int4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneAtoms"
);
vsiteLocalCoordsAtoms
=
CudaArray
::
create
<
int4
>
(
context
,
max
(
1
,
numLocalCoords
),
"vsiteLocalCoordinatesAtoms"
);
vsiteLocalCoordsIndex
=
CudaArray
::
create
<
int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsIndexVec
.
size
()),
"vsiteLocalCoordsIndex"
);
vsiteLocalCoordsAtoms
=
CudaArray
::
create
<
int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsAtomVec
.
size
()),
"vsiteLocalCoordsAtoms"
);
vsiteLocalCoordsStartIndex
=
CudaArray
::
create
<
int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsStartVec
.
size
()),
"vsiteLocalCoordsStartIndex"
);
if
(
num2Avg
>
0
)
if
(
num2Avg
>
0
)
vsite2AvgAtoms
->
upload
(
vsite2AvgAtomVec
);
vsite2AvgAtoms
->
upload
(
vsite2AvgAtomVec
);
if
(
num3Avg
>
0
)
if
(
num3Avg
>
0
)
vsite3AvgAtoms
->
upload
(
vsite3AvgAtomVec
);
vsite3AvgAtoms
->
upload
(
vsite3AvgAtomVec
);
if
(
numOutOfPlane
>
0
)
if
(
numOutOfPlane
>
0
)
vsiteOutOfPlaneAtoms
->
upload
(
vsiteOutOfPlaneAtomVec
);
vsiteOutOfPlaneAtoms
->
upload
(
vsiteOutOfPlaneAtomVec
);
if
(
numLocalCoords
>
0
)
if
(
numLocalCoords
>
0
)
{
vsiteLocalCoordsIndex
->
upload
(
vsiteLocalCoordsIndexVec
);
vsiteLocalCoordsAtoms
->
upload
(
vsiteLocalCoordsAtomVec
);
vsiteLocalCoordsAtoms
->
upload
(
vsiteLocalCoordsAtomVec
);
vsiteLocalCoordsStartIndex
->
upload
(
vsiteLocalCoordsStartVec
);
}
if
(
context
.
getUseDoublePrecision
())
{
if
(
context
.
getUseDoublePrecision
())
{
vsite2AvgWeights
=
CudaArray
::
create
<
double2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite2AvgWeights
=
CudaArray
::
create
<
double2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite3AvgWeights
=
CudaArray
::
create
<
double4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsite3AvgWeights
=
CudaArray
::
create
<
double4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsiteOutOfPlaneWeights
=
CudaArray
::
create
<
double4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteOutOfPlaneWeights
=
CudaArray
::
create
<
double4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteLocalCoordsParams
=
CudaArray
::
create
<
double
>
(
context
,
max
(
1
,
12
*
numLocalCoords
),
"vsiteLocalCoordinatesParams"
);
vsiteLocalCoordsWeights
=
CudaArray
::
create
<
double
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsWeightVec
.
size
()),
"vsiteLocalCoordsWeights"
);
vsiteLocalCoordsPos
=
CudaArray
::
create
<
double4
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsPosVec
.
size
()),
"vsiteLocalCoordsPos"
);
if
(
num2Avg
>
0
)
if
(
num2Avg
>
0
)
vsite2AvgWeights
->
upload
(
vsite2AvgWeightVec
);
vsite2AvgWeights
->
upload
(
vsite2AvgWeightVec
);
if
(
num3Avg
>
0
)
if
(
num3Avg
>
0
)
vsite3AvgWeights
->
upload
(
vsite3AvgWeightVec
);
vsite3AvgWeights
->
upload
(
vsite3AvgWeightVec
);
if
(
numOutOfPlane
>
0
)
if
(
numOutOfPlane
>
0
)
vsiteOutOfPlaneWeights
->
upload
(
vsiteOutOfPlaneWeightVec
);
vsiteOutOfPlaneWeights
->
upload
(
vsiteOutOfPlaneWeightVec
);
if
(
numLocalCoords
>
0
)
if
(
numLocalCoords
>
0
)
{
vsiteLocalCoordsParams
->
upload
(
vsiteLocalCoordsParamVec
);
vsiteLocalCoordsWeights
->
upload
(
vsiteLocalCoordsWeightVec
);
vsiteLocalCoordsPos
->
upload
(
vsiteLocalCoordsPosVec
);
}
}
}
else
{
else
{
vsite2AvgWeights
=
CudaArray
::
create
<
float2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite2AvgWeights
=
CudaArray
::
create
<
float2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite3AvgWeights
=
CudaArray
::
create
<
float4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsite3AvgWeights
=
CudaArray
::
create
<
float4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsiteOutOfPlaneWeights
=
CudaArray
::
create
<
float4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteOutOfPlaneWeights
=
CudaArray
::
create
<
float4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteLocalCoordsParams
=
CudaArray
::
create
<
float
>
(
context
,
max
(
1
,
12
*
numLocalCoords
),
"vsiteLocalCoordinatesParams"
);
vsiteLocalCoordsWeights
=
CudaArray
::
create
<
float
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsWeightVec
.
size
()),
"vsiteLocalCoordsWeights"
);
vsiteLocalCoordsPos
=
CudaArray
::
create
<
float4
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsPosVec
.
size
()),
"vsiteLocalCoordsPos"
);
if
(
num2Avg
>
0
)
{
if
(
num2Avg
>
0
)
{
vector
<
float2
>
floatWeights
(
num2Avg
);
vector
<
float2
>
floatWeights
(
num2Avg
);
for
(
int
i
=
0
;
i
<
num2Avg
;
i
++
)
for
(
int
i
=
0
;
i
<
num2Avg
;
i
++
)
...
@@ -557,10 +569,14 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
...
@@ -557,10 +569,14 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
vsiteOutOfPlaneWeights
->
upload
(
floatWeights
);
vsiteOutOfPlaneWeights
->
upload
(
floatWeights
);
}
}
if
(
numLocalCoords
>
0
)
{
if
(
numLocalCoords
>
0
)
{
vector
<
float
>
floatParams
(
vsiteLocalCoordsParamVec
.
size
());
vector
<
float
>
floatWeights
(
vsiteLocalCoordsWeightVec
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
vsiteLocalCoordsParamVec
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
vsiteLocalCoordsWeightVec
.
size
();
i
++
)
floatParams
[
i
]
=
(
float
)
vsiteLocalCoordsParamVec
[
i
];
floatWeights
[
i
]
=
(
float
)
vsiteLocalCoordsWeightVec
[
i
];
vsiteLocalCoordsParams
->
upload
(
floatParams
);
vsiteLocalCoordsWeights
->
upload
(
floatWeights
);
vector
<
float4
>
floatPos
(
vsiteLocalCoordsPosVec
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
vsiteLocalCoordsPosVec
.
size
();
i
++
)
floatPos
[
i
]
=
make_float4
((
float
)
vsiteLocalCoordsPosVec
[
i
].
x
,
(
float
)
vsiteLocalCoordsPosVec
[
i
].
y
,
(
float
)
vsiteLocalCoordsPosVec
[
i
].
z
,
0.0
f
);
vsiteLocalCoordsPos
->
upload
(
floatPos
);
}
}
}
}
...
@@ -644,10 +660,16 @@ CudaIntegrationUtilities::~CudaIntegrationUtilities() {
...
@@ -644,10 +660,16 @@ CudaIntegrationUtilities::~CudaIntegrationUtilities() {
delete
vsiteOutOfPlaneAtoms
;
delete
vsiteOutOfPlaneAtoms
;
if
(
vsiteOutOfPlaneWeights
!=
NULL
)
if
(
vsiteOutOfPlaneWeights
!=
NULL
)
delete
vsiteOutOfPlaneWeights
;
delete
vsiteOutOfPlaneWeights
;
if
(
vsiteLocalCoordsIndex
!=
NULL
)
delete
vsiteLocalCoordsIndex
;
if
(
vsiteLocalCoordsAtoms
!=
NULL
)
if
(
vsiteLocalCoordsAtoms
!=
NULL
)
delete
vsiteLocalCoordsAtoms
;
delete
vsiteLocalCoordsAtoms
;
if
(
vsiteLocalCoordsParams
!=
NULL
)
if
(
vsiteLocalCoordsWeights
!=
NULL
)
delete
vsiteLocalCoordsParams
;
delete
vsiteLocalCoordsWeights
;
if
(
vsiteLocalCoordsPos
!=
NULL
)
delete
vsiteLocalCoordsPos
;
if
(
vsiteLocalCoordsStartIndex
!=
NULL
)
delete
vsiteLocalCoordsStartIndex
;
}
}
void
CudaIntegrationUtilities
::
setNextStepSize
(
double
size
)
{
void
CudaIntegrationUtilities
::
setNextStepSize
(
double
size
)
{
...
@@ -747,7 +769,9 @@ void CudaIntegrationUtilities::computeVirtualSites() {
...
@@ -747,7 +769,9 @@ void CudaIntegrationUtilities::computeVirtualSites() {
void
*
args
[]
=
{
&
context
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
vsite2AvgAtoms
->
getDevicePointer
(),
&
vsite2AvgWeights
->
getDevicePointer
(),
void
*
args
[]
=
{
&
context
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
vsite2AvgAtoms
->
getDevicePointer
(),
&
vsite2AvgWeights
->
getDevicePointer
(),
&
vsite3AvgAtoms
->
getDevicePointer
(),
&
vsite3AvgWeights
->
getDevicePointer
(),
&
vsite3AvgAtoms
->
getDevicePointer
(),
&
vsite3AvgWeights
->
getDevicePointer
(),
&
vsiteOutOfPlaneAtoms
->
getDevicePointer
(),
&
vsiteOutOfPlaneWeights
->
getDevicePointer
(),
&
vsiteOutOfPlaneAtoms
->
getDevicePointer
(),
&
vsiteOutOfPlaneWeights
->
getDevicePointer
(),
&
vsiteLocalCoordsAtoms
->
getDevicePointer
(),
&
vsiteLocalCoordsParams
->
getDevicePointer
()};
&
vsiteLocalCoordsIndex
->
getDevicePointer
(),
&
vsiteLocalCoordsAtoms
->
getDevicePointer
(),
&
vsiteLocalCoordsWeights
->
getDevicePointer
(),
&
vsiteLocalCoordsPos
->
getDevicePointer
(),
&
vsiteLocalCoordsStartIndex
->
getDevicePointer
()};
context
.
executeKernel
(
vsitePositionKernel
,
args
,
numVsites
);
context
.
executeKernel
(
vsitePositionKernel
,
args
,
numVsites
);
}
}
}
}
...
@@ -759,7 +783,9 @@ void CudaIntegrationUtilities::distributeForcesFromVirtualSites() {
...
@@ -759,7 +783,9 @@ void CudaIntegrationUtilities::distributeForcesFromVirtualSites() {
&
vsite2AvgAtoms
->
getDevicePointer
(),
&
vsite2AvgWeights
->
getDevicePointer
(),
&
vsite2AvgAtoms
->
getDevicePointer
(),
&
vsite2AvgWeights
->
getDevicePointer
(),
&
vsite3AvgAtoms
->
getDevicePointer
(),
&
vsite3AvgWeights
->
getDevicePointer
(),
&
vsite3AvgAtoms
->
getDevicePointer
(),
&
vsite3AvgWeights
->
getDevicePointer
(),
&
vsiteOutOfPlaneAtoms
->
getDevicePointer
(),
&
vsiteOutOfPlaneWeights
->
getDevicePointer
(),
&
vsiteOutOfPlaneAtoms
->
getDevicePointer
(),
&
vsiteOutOfPlaneWeights
->
getDevicePointer
(),
&
vsiteLocalCoordsAtoms
->
getDevicePointer
(),
&
vsiteLocalCoordsParams
->
getDevicePointer
()};
&
vsiteLocalCoordsIndex
->
getDevicePointer
(),
&
vsiteLocalCoordsAtoms
->
getDevicePointer
(),
&
vsiteLocalCoordsWeights
->
getDevicePointer
(),
&
vsiteLocalCoordsPos
->
getDevicePointer
(),
&
vsiteLocalCoordsStartIndex
->
getDevicePointer
()};
context
.
executeKernel
(
vsiteForceKernel
,
args
,
numVsites
);
context
.
executeKernel
(
vsiteForceKernel
,
args
,
numVsites
);
}
}
}
}
...
...
platforms/cuda/src/CudaKernelFactory.cpp
View file @
18295108
...
@@ -108,6 +108,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
...
@@ -108,6 +108,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return
new
CudaCalcCustomCentroidBondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
CudaCalcCustomCentroidBondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcCustomCompoundBondForceKernel
::
Name
())
if
(
name
==
CalcCustomCompoundBondForceKernel
::
Name
())
return
new
CudaCalcCustomCompoundBondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
CudaCalcCustomCompoundBondForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcCustomCVForceKernel
::
Name
())
return
new
CudaCalcCustomCVForceKernel
(
name
,
platform
,
cu
);
if
(
name
==
CalcCustomManyParticleForceKernel
::
Name
())
if
(
name
==
CalcCustomManyParticleForceKernel
::
Name
())
return
new
CudaCalcCustomManyParticleForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
return
new
CudaCalcCustomManyParticleForceKernel
(
name
,
platform
,
cu
,
context
.
getSystem
());
if
(
name
==
CalcGayBerneForceKernel
::
Name
())
if
(
name
==
CalcGayBerneForceKernel
::
Name
())
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
18295108
This diff is collapsed.
Click to expand it.
platforms/cuda/src/CudaPlatform.cpp
View file @
18295108
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2008-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
7
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -91,6 +91,7 @@ CudaPlatform::CudaPlatform() {
...
@@ -91,6 +91,7 @@ CudaPlatform::CudaPlatform() {
registerKernelFactory
(
CalcCustomHbondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomHbondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCentroidBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCentroidBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCompoundBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCompoundBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCVForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomManyParticleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomManyParticleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGayBerneForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGayBerneForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
...
@@ -198,7 +199,23 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
...
@@ -198,7 +199,23 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
if
(
threadsEnv
!=
NULL
)
if
(
threadsEnv
!=
NULL
)
stringstream
(
threadsEnv
)
>>
threads
;
stringstream
(
threadsEnv
)
>>
threads
;
context
.
setPlatformData
(
new
PlatformData
(
&
context
,
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
cpuPmePropValue
,
compilerPropValue
,
tempPropValue
,
context
.
setPlatformData
(
new
PlatformData
(
&
context
,
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
cpuPmePropValue
,
compilerPropValue
,
tempPropValue
,
hostCompilerPropValue
,
pmeStreamPropValue
,
deterministicForcesValue
,
threads
));
hostCompilerPropValue
,
pmeStreamPropValue
,
deterministicForcesValue
,
threads
,
NULL
));
}
void
CudaPlatform
::
linkedContextCreated
(
ContextImpl
&
context
,
ContextImpl
&
originalContext
)
const
{
Platform
&
platform
=
originalContext
.
getPlatform
();
string
devicePropValue
=
platform
.
getPropertyValue
(
originalContext
.
getOwner
(),
CudaDeviceIndex
());
string
blockingPropValue
=
platform
.
getPropertyValue
(
originalContext
.
getOwner
(),
CudaUseBlockingSync
());
string
precisionPropValue
=
platform
.
getPropertyValue
(
originalContext
.
getOwner
(),
CudaPrecision
());
string
cpuPmePropValue
=
platform
.
getPropertyValue
(
originalContext
.
getOwner
(),
CudaUseCpuPme
());
string
compilerPropValue
=
platform
.
getPropertyValue
(
originalContext
.
getOwner
(),
CudaCompiler
());
string
tempPropValue
=
platform
.
getPropertyValue
(
originalContext
.
getOwner
(),
CudaTempDirectory
());
string
hostCompilerPropValue
=
platform
.
getPropertyValue
(
originalContext
.
getOwner
(),
CudaHostCompiler
());
string
pmeStreamPropValue
=
platform
.
getPropertyValue
(
originalContext
.
getOwner
(),
CudaDisablePmeStream
());
string
deterministicForcesValue
=
platform
.
getPropertyValue
(
originalContext
.
getOwner
(),
CudaDeterministicForces
());
int
threads
=
reinterpret_cast
<
PlatformData
*>
(
originalContext
.
getPlatformData
())
->
threads
.
getNumThreads
();
context
.
setPlatformData
(
new
PlatformData
(
&
context
,
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
cpuPmePropValue
,
compilerPropValue
,
tempPropValue
,
hostCompilerPropValue
,
pmeStreamPropValue
,
deterministicForcesValue
,
threads
,
&
originalContext
));
}
}
void
CudaPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
void
CudaPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
...
@@ -208,7 +225,7 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
...
@@ -208,7 +225,7 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
CudaPlatform
::
PlatformData
::
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
string
&
deviceIndexProperty
,
const
string
&
blockingProperty
,
const
string
&
precisionProperty
,
CudaPlatform
::
PlatformData
::
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
string
&
deviceIndexProperty
,
const
string
&
blockingProperty
,
const
string
&
precisionProperty
,
const
string
&
cpuPmeProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
,
const
string
&
hostCompilerProperty
,
const
string
&
pmeStreamProperty
,
const
string
&
cpuPmeProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
,
const
string
&
hostCompilerProperty
,
const
string
&
pmeStreamProperty
,
const
string
&
deterministicForcesProperty
,
int
numThreads
)
:
const
string
&
deterministicForcesProperty
,
int
numThreads
,
ContextImpl
*
originalContext
)
:
context
(
context
),
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
),
hasInitializedContexts
(
false
),
threads
(
numThreads
)
{
context
(
context
),
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
),
hasInitializedContexts
(
false
),
threads
(
numThreads
)
{
bool
blocking
=
(
blockingProperty
==
"true"
);
bool
blocking
=
(
blockingProperty
==
"true"
);
vector
<
string
>
devices
;
vector
<
string
>
devices
;
...
@@ -218,16 +235,19 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
...
@@ -218,16 +235,19 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
searchPos
=
nextPos
+
1
;
searchPos
=
nextPos
+
1
;
}
}
devices
.
push_back
(
deviceIndexProperty
.
substr
(
searchPos
));
devices
.
push_back
(
deviceIndexProperty
.
substr
(
searchPos
));
PlatformData
*
originalData
=
NULL
;
if
(
originalContext
!=
NULL
)
originalData
=
reinterpret_cast
<
PlatformData
*>
(
originalContext
->
getPlatformData
());
try
{
try
{
for
(
int
i
=
0
;
i
<
(
int
)
devices
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
devices
.
size
();
i
++
)
{
if
(
devices
[
i
].
length
()
>
0
)
{
if
(
devices
[
i
].
length
()
>
0
)
{
int
deviceIndex
;
int
deviceIndex
;
stringstream
(
devices
[
i
])
>>
deviceIndex
;
stringstream
(
devices
[
i
])
>>
deviceIndex
;
contexts
.
push_back
(
new
CudaContext
(
system
,
deviceIndex
,
blocking
,
precisionProperty
,
compilerProperty
,
tempProperty
,
hostCompilerProperty
,
*
this
));
contexts
.
push_back
(
new
CudaContext
(
system
,
deviceIndex
,
blocking
,
precisionProperty
,
compilerProperty
,
tempProperty
,
hostCompilerProperty
,
*
this
,
(
originalData
==
NULL
?
NULL
:
originalData
->
contexts
[
i
])
));
}
}
}
}
if
(
contexts
.
size
()
==
0
)
if
(
contexts
.
size
()
==
0
)
contexts
.
push_back
(
new
CudaContext
(
system
,
-
1
,
blocking
,
precisionProperty
,
compilerProperty
,
tempProperty
,
hostCompilerProperty
,
*
this
));
contexts
.
push_back
(
new
CudaContext
(
system
,
-
1
,
blocking
,
precisionProperty
,
compilerProperty
,
tempProperty
,
hostCompilerProperty
,
*
this
,
(
originalData
==
NULL
?
NULL
:
originalData
->
contexts
[
0
])
));
}
}
catch
(...)
{
catch
(...)
{
// If an exception was thrown, do our best to clean up memory.
// If an exception was thrown, do our best to clean up memory.
...
...
platforms/cuda/src/kernels/customCVForce.cu
0 → 100644
View file @
18295108
/**
* Copy the positions and velocities to the inner context.
*/
extern
"C"
__global__
void
copyState
(
real4
*
posq
,
real4
*
posqCorrection
,
mixed4
*
velm
,
int
*
__restrict__
atomOrder
,
real4
*
innerPosq
,
real4
*
innerPosqCorrection
,
mixed4
*
innerVelm
,
int
*
__restrict__
innerInvAtomOrder
,
int
numAtoms
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
numAtoms
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
index
=
innerInvAtomOrder
[
atomOrder
[
i
]];
innerPosq
[
index
]
=
posq
[
i
];
innerVelm
[
index
]
=
velm
[
i
];
#ifdef USE_MIXED_PRECISION
innerPosqCorrection
[
index
]
=
posqCorrection
[
i
];
#endif
}
}
/**
* Copy the forces back to the main context.
*/
extern
"C"
__global__
void
copyForces
(
long
long
*
forces
,
int
*
__restrict__
invAtomOrder
,
long
long
*
innerForces
,
int
*
__restrict__
innerAtomOrder
,
int
numAtoms
,
int
paddedNumAtoms
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
numAtoms
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
index
=
invAtomOrder
[
innerAtomOrder
[
i
]];
forces
[
index
]
=
innerForces
[
i
];
forces
[
index
+
paddedNumAtoms
]
=
innerForces
[
i
+
paddedNumAtoms
];
forces
[
index
+
paddedNumAtoms
*
2
]
=
innerForces
[
i
+
paddedNumAtoms
*
2
];
}
}
/**
* Add all the forces from the CVs.
*/
extern
"C"
__global__
void
addForces
(
long
long
*
forces
,
int
bufferSize
PARAMETER_ARGUMENTS
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
bufferSize
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
ADD_FORCES
}
}
platforms/cuda/src/kernels/customHbondForce.cu
View file @
18295108
...
@@ -6,26 +6,17 @@ inline __device__ real3 trim(real4 v) {
...
@@ -6,26 +6,17 @@ inline __device__ real3 trim(real4 v) {
}
}
/**
/**
* This does nothing, and just exists to simply the code generation.
* This does nothing, and just exists to simpl
if
y the code generation.
*/
*/
inline
__device__
real3
trim
(
real3
v
)
{
inline
__device__
real3
trim
(
real3
v
)
{
return
v
;
return
v
;
}
}
/**
/**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
* Compute the difference between two vectors, optionally taking periodic boundary conditions into account
*/
inline
__device__
real4
delta
(
real4
vec1
,
real4
vec2
)
{
real4
result
=
make_real4
(
vec1
.
x
-
vec2
.
x
,
vec1
.
y
-
vec2
.
y
,
vec1
.
z
-
vec2
.
z
,
0.0
f
);
result
.
w
=
result
.
x
*
result
.
x
+
result
.
y
*
result
.
y
+
result
.
z
*
result
.
z
;
return
result
;
}
/**
* Compute the difference between two vectors, taking periodic boundary conditions into account
* and setting the fourth component to the squared magnitude.
* and setting the fourth component to the squared magnitude.
*/
*/
inline
__device__
real4
delta
Periodic
(
real4
vec1
,
real4
vec2
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
)
{
inline
__device__
real4
delta
(
real4
vec1
,
real4
vec2
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
)
{
real4
result
=
make_real4
(
vec1
.
x
-
vec2
.
x
,
vec1
.
y
-
vec2
.
y
,
vec1
.
z
-
vec2
.
z
,
0.0
f
);
real4
result
=
make_real4
(
vec1
.
x
-
vec2
.
x
,
vec1
.
y
-
vec2
.
y
,
vec1
.
z
-
vec2
.
z
,
0.0
f
);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
result
)
APPLY_PERIODIC_TO_DELTA
(
result
)
...
@@ -95,6 +86,7 @@ extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ f
...
@@ -95,6 +86,7 @@ extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ f
for
(
int
acceptorStart
=
0
;
acceptorStart
<
NUM_ACCEPTORS
;
acceptorStart
+=
blockDim
.
x
)
{
for
(
int
acceptorStart
=
0
;
acceptorStart
<
NUM_ACCEPTORS
;
acceptorStart
+=
blockDim
.
x
)
{
// Load the next block of acceptors into local memory.
// Load the next block of acceptors into local memory.
__syncthreads
();
int
blockSize
=
min
((
int
)
blockDim
.
x
,
NUM_ACCEPTORS
-
acceptorStart
);
int
blockSize
=
min
((
int
)
blockDim
.
x
,
NUM_ACCEPTORS
-
acceptorStart
);
if
(
threadIdx
.
x
<
blockSize
)
{
if
(
threadIdx
.
x
<
blockSize
)
{
int4
atoms2
=
acceptorAtoms
[
acceptorStart
+
threadIdx
.
x
];
int4
atoms2
=
acceptorAtoms
[
acceptorStart
+
threadIdx
.
x
];
...
@@ -105,8 +97,8 @@ extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ f
...
@@ -105,8 +97,8 @@ extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ f
__syncthreads
();
__syncthreads
();
if
(
donorIndex
<
NUM_DONORS
)
{
if
(
donorIndex
<
NUM_DONORS
)
{
for
(
int
index
=
0
;
index
<
blockSize
;
index
++
)
{
for
(
int
index
=
0
;
index
<
blockSize
;
index
++
)
{
#ifdef USE_EXCLUSIONS
int
acceptorIndex
=
acceptorStart
+
index
;
int
acceptorIndex
=
acceptorStart
+
index
;
#ifdef USE_EXCLUSIONS
if
(
acceptorIndex
==
exclusionIndices
.
x
||
acceptorIndex
==
exclusionIndices
.
y
||
acceptorIndex
==
exclusionIndices
.
z
||
acceptorIndex
==
exclusionIndices
.
w
)
if
(
acceptorIndex
==
exclusionIndices
.
x
||
acceptorIndex
==
exclusionIndices
.
y
||
acceptorIndex
==
exclusionIndices
.
z
||
acceptorIndex
==
exclusionIndices
.
w
)
continue
;
continue
;
#endif
#endif
...
@@ -115,7 +107,7 @@ extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ f
...
@@ -115,7 +107,7 @@ extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ f
real4
a1
=
posBuffer
[
3
*
index
];
real4
a1
=
posBuffer
[
3
*
index
];
real4
a2
=
posBuffer
[
3
*
index
+
1
];
real4
a2
=
posBuffer
[
3
*
index
+
1
];
real4
a3
=
posBuffer
[
3
*
index
+
2
];
real4
a3
=
posBuffer
[
3
*
index
+
2
];
real4
deltaD1A1
=
delta
Periodic
(
d1
,
a1
,
periodicBoxSize
,
invPeriodicBoxSize
,
periodicBoxVecX
,
periodicBoxVecY
,
periodicBoxVecZ
);
real4
deltaD1A1
=
delta
(
d1
,
a1
,
periodicBoxSize
,
invPeriodicBoxSize
,
periodicBoxVecX
,
periodicBoxVecY
,
periodicBoxVecZ
);
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
deltaD1A1
.
w
<
CUTOFF_SQUARED
)
{
if
(
deltaD1A1
.
w
<
CUTOFF_SQUARED
)
{
#endif
#endif
...
@@ -183,6 +175,7 @@ extern "C" __global__ void computeAcceptorForces(unsigned long long* __restrict_
...
@@ -183,6 +175,7 @@ extern "C" __global__ void computeAcceptorForces(unsigned long long* __restrict_
for
(
int
donorStart
=
0
;
donorStart
<
NUM_DONORS
;
donorStart
+=
blockDim
.
x
)
{
for
(
int
donorStart
=
0
;
donorStart
<
NUM_DONORS
;
donorStart
+=
blockDim
.
x
)
{
// Load the next block of donors into local memory.
// Load the next block of donors into local memory.
__syncthreads
();
int
blockSize
=
min
((
int
)
blockDim
.
x
,
NUM_DONORS
-
donorStart
);
int
blockSize
=
min
((
int
)
blockDim
.
x
,
NUM_DONORS
-
donorStart
);
if
(
threadIdx
.
x
<
blockSize
)
{
if
(
threadIdx
.
x
<
blockSize
)
{
int4
atoms2
=
donorAtoms
[
donorStart
+
threadIdx
.
x
];
int4
atoms2
=
donorAtoms
[
donorStart
+
threadIdx
.
x
];
...
@@ -193,8 +186,8 @@ extern "C" __global__ void computeAcceptorForces(unsigned long long* __restrict_
...
@@ -193,8 +186,8 @@ extern "C" __global__ void computeAcceptorForces(unsigned long long* __restrict_
__syncthreads
();
__syncthreads
();
if
(
acceptorIndex
<
NUM_ACCEPTORS
)
{
if
(
acceptorIndex
<
NUM_ACCEPTORS
)
{
for
(
int
index
=
0
;
index
<
blockSize
;
index
++
)
{
for
(
int
index
=
0
;
index
<
blockSize
;
index
++
)
{
#ifdef USE_EXCLUSIONS
int
donorIndex
=
donorStart
+
index
;
int
donorIndex
=
donorStart
+
index
;
#ifdef USE_EXCLUSIONS
if
(
donorIndex
==
exclusionIndices
.
x
||
donorIndex
==
exclusionIndices
.
y
||
donorIndex
==
exclusionIndices
.
z
||
donorIndex
==
exclusionIndices
.
w
)
if
(
donorIndex
==
exclusionIndices
.
x
||
donorIndex
==
exclusionIndices
.
y
||
donorIndex
==
exclusionIndices
.
z
||
donorIndex
==
exclusionIndices
.
w
)
continue
;
continue
;
#endif
#endif
...
@@ -203,7 +196,7 @@ extern "C" __global__ void computeAcceptorForces(unsigned long long* __restrict_
...
@@ -203,7 +196,7 @@ extern "C" __global__ void computeAcceptorForces(unsigned long long* __restrict_
real4
d1
=
posBuffer
[
3
*
index
];
real4
d1
=
posBuffer
[
3
*
index
];
real4
d2
=
posBuffer
[
3
*
index
+
1
];
real4
d2
=
posBuffer
[
3
*
index
+
1
];
real4
d3
=
posBuffer
[
3
*
index
+
2
];
real4
d3
=
posBuffer
[
3
*
index
+
2
];
real4
deltaD1A1
=
delta
Periodic
(
d1
,
a1
,
periodicBoxSize
,
invPeriodicBoxSize
,
periodicBoxVecX
,
periodicBoxVecY
,
periodicBoxVecZ
);
real4
deltaD1A1
=
delta
(
d1
,
a1
,
periodicBoxSize
,
invPeriodicBoxSize
,
periodicBoxVecX
,
periodicBoxVecY
,
periodicBoxVecZ
);
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
deltaD1A1
.
w
<
CUTOFF_SQUARED
)
{
if
(
deltaD1A1
.
w
<
CUTOFF_SQUARED
)
{
#endif
#endif
...
...
platforms/cuda/src/kernels/customManyParticle.cu
View file @
18295108
...
@@ -60,6 +60,11 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
...
@@ -60,6 +60,11 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
* Determine whether a particular interaction is in the list of exclusions.
* Determine whether a particular interaction is in the list of exclusions.
*/
*/
inline
__device__
bool
isInteractionExcluded
(
int
atom1
,
int
atom2
,
const
int
*
__restrict__
exclusions
,
const
int
*
__restrict__
exclusionStartIndex
)
{
inline
__device__
bool
isInteractionExcluded
(
int
atom1
,
int
atom2
,
const
int
*
__restrict__
exclusions
,
const
int
*
__restrict__
exclusionStartIndex
)
{
if
(
atom1
>
atom2
)
{
int
temp
=
atom1
;
atom1
=
atom2
;
atom2
=
temp
;
}
int
first
=
exclusionStartIndex
[
atom1
];
int
first
=
exclusionStartIndex
[
atom1
];
int
last
=
exclusionStartIndex
[
atom1
+
1
];
int
last
=
exclusionStartIndex
[
atom1
+
1
];
for
(
int
i
=
last
-
1
;
i
>=
first
;
i
--
)
{
for
(
int
i
=
last
-
1
;
i
>=
first
;
i
--
)
{
...
...
platforms/cuda/src/kernels/integrationUtilities.cu
View file @
18295108
...
@@ -680,7 +680,9 @@ extern "C" __global__ void updateCCMAAtomPositions(const int* __restrict__ numAt
...
@@ -680,7 +680,9 @@ extern "C" __global__ void updateCCMAAtomPositions(const int* __restrict__ numAt
extern
"C"
__global__
void
computeVirtualSites
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
const
int4
*
__restrict__
avg2Atoms
,
const
real2
*
__restrict__
avg2Weights
,
extern
"C"
__global__
void
computeVirtualSites
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
const
int4
*
__restrict__
avg2Atoms
,
const
real2
*
__restrict__
avg2Weights
,
const
int4
*
__restrict__
avg3Atoms
,
const
real4
*
__restrict__
avg3Weights
,
const
int4
*
__restrict__
avg3Atoms
,
const
real4
*
__restrict__
avg3Weights
,
const
int4
*
__restrict__
outOfPlaneAtoms
,
const
real4
*
__restrict__
outOfPlaneWeights
,
const
int4
*
__restrict__
outOfPlaneAtoms
,
const
real4
*
__restrict__
outOfPlaneWeights
,
const
int4
*
__restrict__
localCoordsAtoms
,
const
real
*
__restrict__
localCoordsParams
)
{
const
int
*
__restrict__
localCoordsIndex
,
const
int
*
__restrict__
localCoordsAtoms
,
const
real
*
__restrict__
localCoordsWeights
,
const
real4
*
__restrict__
localCoordsPos
,
const
int
*
__restrict__
localCoordsStartIndex
)
{
// Two particle average sites.
// Two particle average sites.
...
@@ -732,30 +734,31 @@ extern "C" __global__ void computeVirtualSites(real4* __restrict__ posq, real4*
...
@@ -732,30 +734,31 @@ extern "C" __global__ void computeVirtualSites(real4* __restrict__ posq, real4*
// Local coordinates sites.
// Local coordinates sites.
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_LOCAL_COORDS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_LOCAL_COORDS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int4
atoms
=
localCoordsAtoms
[
index
];
int
siteAtomIndex
=
localCoordsIndex
[
index
];
const
real
*
params
=
&
localCoordsParams
[
12
*
index
];
int
start
=
localCoordsStartIndex
[
index
];
mixed4
pos
=
loadPos
(
posq
,
posqCorrection
,
atoms
.
x
);
int
end
=
localCoordsStartIndex
[
index
+
1
];
mixed4
pos1_4
=
loadPos
(
posq
,
posqCorrection
,
atoms
.
y
);
mixed3
origin
=
make_mixed3
(
0
),
xdir
=
make_mixed3
(
0
),
ydir
=
make_mixed3
(
0
);
mixed4
pos2_4
=
loadPos
(
posq
,
posqCorrection
,
atoms
.
z
);
for
(
int
j
=
start
;
j
<
end
;
j
++
)
{
mixed4
pos3_4
=
loadPos
(
posq
,
posqCorrection
,
atoms
.
w
);
mixed3
pos
=
trimTo3
(
loadPos
(
posq
,
posqCorrection
,
localCoordsAtoms
[
j
]));
mixed3
pos1
=
make_mixed3
(
pos1_4
.
x
,
pos1_4
.
y
,
pos1_4
.
z
);
origin
+=
pos
*
localCoordsWeights
[
3
*
j
];
mixed3
pos2
=
make_mixed3
(
pos2_4
.
x
,
pos2_4
.
y
,
pos2_4
.
z
);
xdir
+=
pos
*
localCoordsWeights
[
3
*
j
+
1
];
mixed3
pos3
=
make_mixed3
(
pos3_4
.
x
,
pos3_4
.
y
,
pos3_4
.
z
);
ydir
+=
pos
*
localCoordsWeights
[
3
*
j
+
2
];
mixed3
originWeights
=
make_mixed3
(
params
[
0
],
params
[
1
],
params
[
2
]);
}
mixed3
xWeights
=
make_mixed3
(
params
[
3
],
params
[
4
],
params
[
5
]);
mixed3
yWeights
=
make_mixed3
(
params
[
6
],
params
[
7
],
params
[
8
]);
mixed3
localPosition
=
make_mixed3
(
params
[
9
],
params
[
10
],
params
[
11
]);
mixed3
origin
=
pos1
*
originWeights
.
x
+
pos2
*
originWeights
.
y
+
pos3
*
originWeights
.
z
;
mixed3
xdir
=
pos1
*
xWeights
.
x
+
pos2
*
xWeights
.
y
+
pos3
*
xWeights
.
z
;
mixed3
ydir
=
pos1
*
yWeights
.
x
+
pos2
*
yWeights
.
y
+
pos3
*
yWeights
.
z
;
mixed3
zdir
=
cross
(
xdir
,
ydir
);
mixed3
zdir
=
cross
(
xdir
,
ydir
);
xdir
*=
rsqrt
(
xdir
.
x
*
xdir
.
x
+
xdir
.
y
*
xdir
.
y
+
xdir
.
z
*
xdir
.
z
);
mixed
normXdir
=
sqrt
(
xdir
.
x
*
xdir
.
x
+
xdir
.
y
*
xdir
.
y
+
xdir
.
z
*
xdir
.
z
);
zdir
*=
rsqrt
(
zdir
.
x
*
zdir
.
x
+
zdir
.
y
*
zdir
.
y
+
zdir
.
z
*
zdir
.
z
);
mixed
normZdir
=
sqrt
(
zdir
.
x
*
zdir
.
x
+
zdir
.
y
*
zdir
.
y
+
zdir
.
z
*
zdir
.
z
);
mixed
invNormXdir
=
(
normXdir
>
0
?
1
/
normXdir
:
0
);
mixed
invNormZdir
=
(
normZdir
>
0
?
1
/
normZdir
:
0
);
xdir
*=
invNormXdir
;
zdir
*=
invNormZdir
;
ydir
=
cross
(
zdir
,
xdir
);
ydir
=
cross
(
zdir
,
xdir
);
real4
localPosition_4
=
localCoordsPos
[
index
];
mixed3
localPosition
=
make_mixed3
(
localPosition_4
.
x
,
localPosition_4
.
y
,
localPosition_4
.
z
);
mixed4
pos
=
loadPos
(
posq
,
posqCorrection
,
siteAtomIndex
);
pos
.
x
=
origin
.
x
+
xdir
.
x
*
localPosition
.
x
+
ydir
.
x
*
localPosition
.
y
+
zdir
.
x
*
localPosition
.
z
;
pos
.
x
=
origin
.
x
+
xdir
.
x
*
localPosition
.
x
+
ydir
.
x
*
localPosition
.
y
+
zdir
.
x
*
localPosition
.
z
;
pos
.
y
=
origin
.
y
+
xdir
.
y
*
localPosition
.
x
+
ydir
.
y
*
localPosition
.
y
+
zdir
.
y
*
localPosition
.
z
;
pos
.
y
=
origin
.
y
+
xdir
.
y
*
localPosition
.
x
+
ydir
.
y
*
localPosition
.
y
+
zdir
.
y
*
localPosition
.
z
;
pos
.
z
=
origin
.
z
+
xdir
.
z
*
localPosition
.
x
+
ydir
.
z
*
localPosition
.
y
+
zdir
.
z
*
localPosition
.
z
;
pos
.
z
=
origin
.
z
+
xdir
.
z
*
localPosition
.
x
+
ydir
.
z
*
localPosition
.
y
+
zdir
.
z
*
localPosition
.
z
;
storePos
(
posq
,
posqCorrection
,
atoms
.
x
,
pos
);
storePos
(
posq
,
posqCorrection
,
siteAtomInde
x
,
pos
);
}
}
}
}
...
@@ -778,7 +781,9 @@ extern "C" __global__ void distributeVirtualSiteForces(const real4* __restrict__
...
@@ -778,7 +781,9 @@ extern "C" __global__ void distributeVirtualSiteForces(const real4* __restrict__
const
int4
*
__restrict__
avg2Atoms
,
const
real2
*
__restrict__
avg2Weights
,
const
int4
*
__restrict__
avg2Atoms
,
const
real2
*
__restrict__
avg2Weights
,
const
int4
*
__restrict__
avg3Atoms
,
const
real4
*
__restrict__
avg3Weights
,
const
int4
*
__restrict__
avg3Atoms
,
const
real4
*
__restrict__
avg3Weights
,
const
int4
*
__restrict__
outOfPlaneAtoms
,
const
real4
*
__restrict__
outOfPlaneWeights
,
const
int4
*
__restrict__
outOfPlaneAtoms
,
const
real4
*
__restrict__
outOfPlaneWeights
,
const
int4
*
__restrict__
localCoordsAtoms
,
const
real
*
__restrict__
localCoordsParams
)
{
const
int
*
__restrict__
localCoordsIndex
,
const
int
*
__restrict__
localCoordsAtoms
,
const
real
*
__restrict__
localCoordsWeights
,
const
real4
*
__restrict__
localCoordsPos
,
const
int
*
__restrict__
localCoordsStartIndex
)
{
// Two particle average sites.
// Two particle average sites.
...
@@ -826,87 +831,56 @@ extern "C" __global__ void distributeVirtualSiteForces(const real4* __restrict__
...
@@ -826,87 +831,56 @@ extern "C" __global__ void distributeVirtualSiteForces(const real4* __restrict__
// Local coordinates sites.
// Local coordinates sites.
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_LOCAL_COORDS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_LOCAL_COORDS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int4
atoms
=
localCoordsAtoms
[
index
];
int
siteAtomIndex
=
localCoordsIndex
[
index
];
const
real
*
params
=
&
localCoordsParams
[
12
*
index
];
int
start
=
localCoordsStartIndex
[
index
];
mixed4
pos
=
loadPos
(
posq
,
posqCorrection
,
atoms
.
x
);
int
end
=
localCoordsStartIndex
[
index
+
1
];
mixed4
pos1_4
=
loadPos
(
posq
,
posqCorrection
,
atoms
.
y
);
mixed3
origin
=
make_mixed3
(
0
),
xdir
=
make_mixed3
(
0
),
ydir
=
make_mixed3
(
0
);
mixed4
pos2_4
=
loadPos
(
posq
,
posqCorrection
,
atoms
.
z
);
for
(
int
j
=
start
;
j
<
end
;
j
++
)
{
mixed4
pos3_4
=
loadPos
(
posq
,
posqCorrection
,
atoms
.
w
);
mixed3
pos
=
trimTo3
(
loadPos
(
posq
,
posqCorrection
,
localCoordsAtoms
[
j
]));
mixed3
pos1
=
make_mixed3
(
pos1_4
.
x
,
pos1_4
.
y
,
pos1_4
.
z
);
origin
+=
pos
*
localCoordsWeights
[
3
*
j
];
mixed3
pos2
=
make_mixed3
(
pos2_4
.
x
,
pos2_4
.
y
,
pos2_4
.
z
);
xdir
+=
pos
*
localCoordsWeights
[
3
*
j
+
1
];
mixed3
pos3
=
make_mixed3
(
pos3_4
.
x
,
pos3_4
.
y
,
pos3_4
.
z
);
ydir
+=
pos
*
localCoordsWeights
[
3
*
j
+
2
];
mixed3
originWeights
=
make_mixed3
(
params
[
0
],
params
[
1
],
params
[
2
]);
}
mixed3
wx
=
make_mixed3
(
params
[
3
],
params
[
4
],
params
[
5
]);
mixed3
wy
=
make_mixed3
(
params
[
6
],
params
[
7
],
params
[
8
]);
mixed3
localPosition
=
make_mixed3
(
params
[
9
],
params
[
10
],
params
[
11
]);
mixed3
origin
=
pos1
*
originWeights
.
x
+
pos2
*
originWeights
.
y
+
pos3
*
originWeights
.
z
;
mixed3
xdir
=
pos1
*
wx
.
x
+
pos2
*
wx
.
y
+
pos3
*
wx
.
z
;
mixed3
ydir
=
pos1
*
wy
.
x
+
pos2
*
wy
.
y
+
pos3
*
wy
.
z
;
mixed3
zdir
=
cross
(
xdir
,
ydir
);
mixed3
zdir
=
cross
(
xdir
,
ydir
);
mixed
invNormXdir
=
rsqrt
(
xdir
.
x
*
xdir
.
x
+
xdir
.
y
*
xdir
.
y
+
xdir
.
z
*
xdir
.
z
);
mixed
normXdir
=
sqrt
(
xdir
.
x
*
xdir
.
x
+
xdir
.
y
*
xdir
.
y
+
xdir
.
z
*
xdir
.
z
);
mixed
invNormZdir
=
rsqrt
(
zdir
.
x
*
zdir
.
x
+
zdir
.
y
*
zdir
.
y
+
zdir
.
z
*
zdir
.
z
);
mixed
normZdir
=
sqrt
(
zdir
.
x
*
zdir
.
x
+
zdir
.
y
*
zdir
.
y
+
zdir
.
z
*
zdir
.
z
);
mixed
invNormXdir
=
(
normXdir
>
0
?
1
/
normXdir
:
0
);
mixed
invNormZdir
=
(
normZdir
>
0
?
1
/
normZdir
:
0
);
mixed3
dx
=
xdir
*
invNormXdir
;
mixed3
dx
=
xdir
*
invNormXdir
;
mixed3
dz
=
zdir
*
invNormZdir
;
mixed3
dz
=
zdir
*
invNormZdir
;
mixed3
dy
=
cross
(
dz
,
dx
);
mixed3
dy
=
cross
(
dz
,
dx
);
real4
localPosition_4
=
localCoordsPos
[
index
];
mixed3
localPosition
=
make_mixed3
(
localPosition_4
.
x
,
localPosition_4
.
y
,
localPosition_4
.
z
);
// The derivatives for this case are very complicated. They were computed with SymPy then simplified by hand.
// The derivatives for this case are very complicated. They were computed with SymPy then simplified by hand.
mixed
t11
=
(
wx
.
x
*
ydir
.
x
-
wy
.
x
*
xdir
.
x
)
*
invNormZdir
;
real3
f
=
loadForce
(
siteAtomIndex
,
force
);
mixed
t12
=
(
wx
.
x
*
ydir
.
y
-
wy
.
x
*
xdir
.
y
)
*
invNormZdir
;
mixed
t13
=
(
wx
.
x
*
ydir
.
z
-
wy
.
x
*
xdir
.
z
)
*
invNormZdir
;
mixed
t21
=
(
wx
.
y
*
ydir
.
x
-
wy
.
y
*
xdir
.
x
)
*
invNormZdir
;
mixed
t22
=
(
wx
.
y
*
ydir
.
y
-
wy
.
y
*
xdir
.
y
)
*
invNormZdir
;
mixed
t23
=
(
wx
.
y
*
ydir
.
z
-
wy
.
y
*
xdir
.
z
)
*
invNormZdir
;
mixed
t31
=
(
wx
.
z
*
ydir
.
x
-
wy
.
z
*
xdir
.
x
)
*
invNormZdir
;
mixed
t32
=
(
wx
.
z
*
ydir
.
y
-
wy
.
z
*
xdir
.
y
)
*
invNormZdir
;
mixed
t33
=
(
wx
.
z
*
ydir
.
z
-
wy
.
z
*
xdir
.
z
)
*
invNormZdir
;
mixed
sx1
=
t13
*
dz
.
y
-
t12
*
dz
.
z
;
mixed
sy1
=
t11
*
dz
.
z
-
t13
*
dz
.
x
;
mixed
sz1
=
t12
*
dz
.
x
-
t11
*
dz
.
y
;
mixed
sx2
=
t23
*
dz
.
y
-
t22
*
dz
.
z
;
mixed
sy2
=
t21
*
dz
.
z
-
t23
*
dz
.
x
;
mixed
sz2
=
t22
*
dz
.
x
-
t21
*
dz
.
y
;
mixed
sx3
=
t33
*
dz
.
y
-
t32
*
dz
.
z
;
mixed
sy3
=
t31
*
dz
.
z
-
t33
*
dz
.
x
;
mixed
sz3
=
t32
*
dz
.
x
-
t31
*
dz
.
y
;
mixed3
wxScaled
=
wx
*
invNormXdir
;
real3
f
=
loadForce
(
atoms
.
x
,
force
);
mixed3
fp1
=
localPosition
*
f
.
x
;
mixed3
fp1
=
localPosition
*
f
.
x
;
mixed3
fp2
=
localPosition
*
f
.
y
;
mixed3
fp2
=
localPosition
*
f
.
y
;
mixed3
fp3
=
localPosition
*
f
.
z
;
mixed3
fp3
=
localPosition
*
f
.
z
;
real3
f1
=
make_real3
(
0
);
for
(
int
j
=
start
;
j
<
end
;
j
++
)
{
real3
f2
=
make_real3
(
0
);
real
originWeight
=
localCoordsWeights
[
3
*
j
];
real3
f3
=
make_real3
(
0
);
real
wx
=
localCoordsWeights
[
3
*
j
+
1
];
f1
.
x
+=
fp1
.
x
*
wxScaled
.
x
*
(
1
-
dx
.
x
*
dx
.
x
)
+
fp1
.
z
*
(
dz
.
x
*
sx1
)
+
fp1
.
y
*
((
-
dx
.
x
*
dy
.
x
)
*
wxScaled
.
x
+
dy
.
x
*
sx1
-
dx
.
y
*
t12
-
dx
.
z
*
t13
)
+
f
.
x
*
originWeights
.
x
;
real
wy
=
localCoordsWeights
[
3
*
j
+
2
];
f1
.
y
+=
fp1
.
x
*
wxScaled
.
x
*
(
-
dx
.
x
*
dx
.
y
)
+
fp1
.
z
*
(
dz
.
x
*
sy1
+
t13
)
+
fp1
.
y
*
((
-
dx
.
y
*
dy
.
x
-
dz
.
z
)
*
wxScaled
.
x
+
dy
.
x
*
sy1
+
dx
.
y
*
t11
);
mixed
wxScaled
=
wx
*
invNormXdir
;
f1
.
z
+=
fp1
.
x
*
wxScaled
.
x
*
(
-
dx
.
x
*
dx
.
z
)
+
fp1
.
z
*
(
dz
.
x
*
sz1
-
t12
)
+
fp1
.
y
*
((
-
dx
.
z
*
dy
.
x
+
dz
.
y
)
*
wxScaled
.
x
+
dy
.
x
*
sz1
+
dx
.
z
*
t11
);
mixed
t1
=
(
wx
*
ydir
.
x
-
wy
*
xdir
.
x
)
*
invNormZdir
;
f2
.
x
+=
fp1
.
x
*
wxScaled
.
y
*
(
1
-
dx
.
x
*
dx
.
x
)
+
fp1
.
z
*
(
dz
.
x
*
sx2
)
+
fp1
.
y
*
((
-
dx
.
x
*
dy
.
x
)
*
wxScaled
.
y
+
dy
.
x
*
sx2
-
dx
.
y
*
t22
-
dx
.
z
*
t23
)
+
f
.
x
*
originWeights
.
y
;
mixed
t2
=
(
wx
*
ydir
.
y
-
wy
*
xdir
.
y
)
*
invNormZdir
;
f2
.
y
+=
fp1
.
x
*
wxScaled
.
y
*
(
-
dx
.
x
*
dx
.
y
)
+
fp1
.
z
*
(
dz
.
x
*
sy2
+
t23
)
+
fp1
.
y
*
((
-
dx
.
y
*
dy
.
x
-
dz
.
z
)
*
wxScaled
.
y
+
dy
.
x
*
sy2
+
dx
.
y
*
t21
);
mixed
t3
=
(
wx
*
ydir
.
z
-
wy
*
xdir
.
z
)
*
invNormZdir
;
f2
.
z
+=
fp1
.
x
*
wxScaled
.
y
*
(
-
dx
.
x
*
dx
.
z
)
+
fp1
.
z
*
(
dz
.
x
*
sz2
-
t22
)
+
fp1
.
y
*
((
-
dx
.
z
*
dy
.
x
+
dz
.
y
)
*
wxScaled
.
y
+
dy
.
x
*
sz2
+
dx
.
z
*
t21
);
mixed
sx
=
t3
*
dz
.
y
-
t2
*
dz
.
z
;
f3
.
x
+=
fp1
.
x
*
wxScaled
.
z
*
(
1
-
dx
.
x
*
dx
.
x
)
+
fp1
.
z
*
(
dz
.
x
*
sx3
)
+
fp1
.
y
*
((
-
dx
.
x
*
dy
.
x
)
*
wxScaled
.
z
+
dy
.
x
*
sx3
-
dx
.
y
*
t32
-
dx
.
z
*
t33
)
+
f
.
x
*
originWeights
.
z
;
mixed
sy
=
t1
*
dz
.
z
-
t3
*
dz
.
x
;
f3
.
y
+=
fp1
.
x
*
wxScaled
.
z
*
(
-
dx
.
x
*
dx
.
y
)
+
fp1
.
z
*
(
dz
.
x
*
sy3
+
t33
)
+
fp1
.
y
*
((
-
dx
.
y
*
dy
.
x
-
dz
.
z
)
*
wxScaled
.
z
+
dy
.
x
*
sy3
+
dx
.
y
*
t31
);
mixed
sz
=
t2
*
dz
.
x
-
t1
*
dz
.
y
;
f3
.
z
+=
fp1
.
x
*
wxScaled
.
z
*
(
-
dx
.
x
*
dx
.
z
)
+
fp1
.
z
*
(
dz
.
x
*
sz3
-
t32
)
+
fp1
.
y
*
((
-
dx
.
z
*
dy
.
x
+
dz
.
y
)
*
wxScaled
.
z
+
dy
.
x
*
sz3
+
dx
.
z
*
t31
);
real3
fresult
=
make_real3
(
0
);
f1
.
x
+=
fp2
.
x
*
wxScaled
.
x
*
(
-
dx
.
y
*
dx
.
x
)
+
fp2
.
z
*
(
dz
.
y
*
sx1
-
t13
)
-
fp2
.
y
*
((
dx
.
x
*
dy
.
y
-
dz
.
z
)
*
wxScaled
.
x
-
dy
.
y
*
sx1
-
dx
.
x
*
t12
);
fresult
.
x
+=
fp1
.
x
*
wxScaled
*
(
1
-
dx
.
x
*
dx
.
x
)
+
fp1
.
z
*
(
dz
.
x
*
sx
)
+
fp1
.
y
*
((
-
dx
.
x
*
dy
.
x
)
*
wxScaled
+
dy
.
x
*
sx
-
dx
.
y
*
t2
-
dx
.
z
*
t3
)
+
f
.
x
*
originWeight
;
f1
.
y
+=
fp2
.
x
*
wxScaled
.
x
*
(
1
-
dx
.
y
*
dx
.
y
)
+
fp2
.
z
*
(
dz
.
y
*
sy1
)
-
fp2
.
y
*
((
dx
.
y
*
dy
.
y
)
*
wxScaled
.
x
-
dy
.
y
*
sy1
+
dx
.
x
*
t11
+
dx
.
z
*
t13
)
+
f
.
y
*
originWeights
.
x
;
fresult
.
y
+=
fp1
.
x
*
wxScaled
*
(
-
dx
.
x
*
dx
.
y
)
+
fp1
.
z
*
(
dz
.
x
*
sy
+
t3
)
+
fp1
.
y
*
((
-
dx
.
y
*
dy
.
x
-
dz
.
z
)
*
wxScaled
+
dy
.
x
*
sy
+
dx
.
y
*
t1
);
f1
.
z
+=
fp2
.
x
*
wxScaled
.
x
*
(
-
dx
.
y
*
dx
.
z
)
+
fp2
.
z
*
(
dz
.
y
*
sz1
+
t11
)
-
fp2
.
y
*
((
dx
.
z
*
dy
.
y
+
dz
.
x
)
*
wxScaled
.
x
-
dy
.
y
*
sz1
-
dx
.
z
*
t12
);
fresult
.
z
+=
fp1
.
x
*
wxScaled
*
(
-
dx
.
x
*
dx
.
z
)
+
fp1
.
z
*
(
dz
.
x
*
sz
-
t2
)
+
fp1
.
y
*
((
-
dx
.
z
*
dy
.
x
+
dz
.
y
)
*
wxScaled
+
dy
.
x
*
sz
+
dx
.
z
*
t1
);
f2
.
x
+=
fp2
.
x
*
wxScaled
.
y
*
(
-
dx
.
y
*
dx
.
x
)
+
fp2
.
z
*
(
dz
.
y
*
sx2
-
t23
)
-
fp2
.
y
*
((
dx
.
x
*
dy
.
y
-
dz
.
z
)
*
wxScaled
.
y
-
dy
.
y
*
sx2
-
dx
.
x
*
t22
);
fresult
.
x
+=
fp2
.
x
*
wxScaled
*
(
-
dx
.
y
*
dx
.
x
)
+
fp2
.
z
*
(
dz
.
y
*
sx
-
t3
)
-
fp2
.
y
*
((
dx
.
x
*
dy
.
y
-
dz
.
z
)
*
wxScaled
-
dy
.
y
*
sx
-
dx
.
x
*
t2
);
f2
.
y
+=
fp2
.
x
*
wxScaled
.
y
*
(
1
-
dx
.
y
*
dx
.
y
)
+
fp2
.
z
*
(
dz
.
y
*
sy2
)
-
fp2
.
y
*
((
dx
.
y
*
dy
.
y
)
*
wxScaled
.
y
-
dy
.
y
*
sy2
+
dx
.
x
*
t21
+
dx
.
z
*
t23
)
+
f
.
y
*
originWeights
.
y
;
fresult
.
y
+=
fp2
.
x
*
wxScaled
*
(
1
-
dx
.
y
*
dx
.
y
)
+
fp2
.
z
*
(
dz
.
y
*
sy
)
-
fp2
.
y
*
((
dx
.
y
*
dy
.
y
)
*
wxScaled
-
dy
.
y
*
sy
+
dx
.
x
*
t1
+
dx
.
z
*
t3
)
+
f
.
y
*
originWeight
;
f2
.
z
+=
fp2
.
x
*
wxScaled
.
y
*
(
-
dx
.
y
*
dx
.
z
)
+
fp2
.
z
*
(
dz
.
y
*
sz2
+
t21
)
-
fp2
.
y
*
((
dx
.
z
*
dy
.
y
+
dz
.
x
)
*
wxScaled
.
y
-
dy
.
y
*
sz2
-
dx
.
z
*
t22
);
fresult
.
z
+=
fp2
.
x
*
wxScaled
*
(
-
dx
.
y
*
dx
.
z
)
+
fp2
.
z
*
(
dz
.
y
*
sz
+
t1
)
-
fp2
.
y
*
((
dx
.
z
*
dy
.
y
+
dz
.
x
)
*
wxScaled
-
dy
.
y
*
sz
-
dx
.
z
*
t2
);
f3
.
x
+=
fp2
.
x
*
wxScaled
.
z
*
(
-
dx
.
y
*
dx
.
x
)
+
fp2
.
z
*
(
dz
.
y
*
sx3
-
t33
)
-
fp2
.
y
*
((
dx
.
x
*
dy
.
y
-
dz
.
z
)
*
wxScaled
.
z
-
dy
.
y
*
sx3
-
dx
.
x
*
t32
);
fresult
.
x
+=
fp3
.
x
*
wxScaled
*
(
-
dx
.
z
*
dx
.
x
)
+
fp3
.
z
*
(
dz
.
z
*
sx
+
t2
)
+
fp3
.
y
*
((
-
dx
.
x
*
dy
.
z
-
dz
.
y
)
*
wxScaled
+
dy
.
z
*
sx
+
dx
.
x
*
t3
);
f3
.
y
+=
fp2
.
x
*
wxScaled
.
z
*
(
1
-
dx
.
y
*
dx
.
y
)
+
fp2
.
z
*
(
dz
.
y
*
sy3
)
-
fp2
.
y
*
((
dx
.
y
*
dy
.
y
)
*
wxScaled
.
z
-
dy
.
y
*
sy3
+
dx
.
x
*
t31
+
dx
.
z
*
t33
)
+
f
.
y
*
originWeights
.
z
;
fresult
.
y
+=
fp3
.
x
*
wxScaled
*
(
-
dx
.
z
*
dx
.
y
)
+
fp3
.
z
*
(
dz
.
z
*
sy
-
t1
)
+
fp3
.
y
*
((
-
dx
.
y
*
dy
.
z
+
dz
.
x
)
*
wxScaled
+
dy
.
z
*
sy
+
dx
.
y
*
t3
);
f3
.
z
+=
fp2
.
x
*
wxScaled
.
z
*
(
-
dx
.
y
*
dx
.
z
)
+
fp2
.
z
*
(
dz
.
y
*
sz3
+
t31
)
-
fp2
.
y
*
((
dx
.
z
*
dy
.
y
+
dz
.
x
)
*
wxScaled
.
z
-
dy
.
y
*
sz3
-
dx
.
z
*
t32
);
fresult
.
z
+=
fp3
.
x
*
wxScaled
*
(
1
-
dx
.
z
*
dx
.
z
)
+
fp3
.
z
*
(
dz
.
z
*
sz
)
+
fp3
.
y
*
((
-
dx
.
z
*
dy
.
z
)
*
wxScaled
+
dy
.
z
*
sz
-
dx
.
x
*
t1
-
dx
.
y
*
t2
)
+
f
.
z
*
originWeight
;
f1
.
x
+=
fp3
.
x
*
wxScaled
.
x
*
(
-
dx
.
z
*
dx
.
x
)
+
fp3
.
z
*
(
dz
.
z
*
sx1
+
t12
)
+
fp3
.
y
*
((
-
dx
.
x
*
dy
.
z
-
dz
.
y
)
*
wxScaled
.
x
+
dy
.
z
*
sx1
+
dx
.
x
*
t13
);
addForce
(
localCoordsAtoms
[
j
],
force
,
fresult
);
f1
.
y
+=
fp3
.
x
*
wxScaled
.
x
*
(
-
dx
.
z
*
dx
.
y
)
+
fp3
.
z
*
(
dz
.
z
*
sy1
-
t11
)
+
fp3
.
y
*
((
-
dx
.
y
*
dy
.
z
+
dz
.
x
)
*
wxScaled
.
x
+
dy
.
z
*
sy1
+
dx
.
y
*
t13
);
}
f1
.
z
+=
fp3
.
x
*
wxScaled
.
x
*
(
1
-
dx
.
z
*
dx
.
z
)
+
fp3
.
z
*
(
dz
.
z
*
sz1
)
+
fp3
.
y
*
((
-
dx
.
z
*
dy
.
z
)
*
wxScaled
.
x
+
dy
.
z
*
sz1
-
dx
.
x
*
t11
-
dx
.
y
*
t12
)
+
f
.
z
*
originWeights
.
x
;
f2
.
x
+=
fp3
.
x
*
wxScaled
.
y
*
(
-
dx
.
z
*
dx
.
x
)
+
fp3
.
z
*
(
dz
.
z
*
sx2
+
t22
)
+
fp3
.
y
*
((
-
dx
.
x
*
dy
.
z
-
dz
.
y
)
*
wxScaled
.
y
+
dy
.
z
*
sx2
+
dx
.
x
*
t23
);
f2
.
y
+=
fp3
.
x
*
wxScaled
.
y
*
(
-
dx
.
z
*
dx
.
y
)
+
fp3
.
z
*
(
dz
.
z
*
sy2
-
t21
)
+
fp3
.
y
*
((
-
dx
.
y
*
dy
.
z
+
dz
.
x
)
*
wxScaled
.
y
+
dy
.
z
*
sy2
+
dx
.
y
*
t23
);
f2
.
z
+=
fp3
.
x
*
wxScaled
.
y
*
(
1
-
dx
.
z
*
dx
.
z
)
+
fp3
.
z
*
(
dz
.
z
*
sz2
)
+
fp3
.
y
*
((
-
dx
.
z
*
dy
.
z
)
*
wxScaled
.
y
+
dy
.
z
*
sz2
-
dx
.
x
*
t21
-
dx
.
y
*
t22
)
+
f
.
z
*
originWeights
.
y
;
f3
.
x
+=
fp3
.
x
*
wxScaled
.
z
*
(
-
dx
.
z
*
dx
.
x
)
+
fp3
.
z
*
(
dz
.
z
*
sx3
+
t32
)
+
fp3
.
y
*
((
-
dx
.
x
*
dy
.
z
-
dz
.
y
)
*
wxScaled
.
z
+
dy
.
z
*
sx3
+
dx
.
x
*
t33
);
f3
.
y
+=
fp3
.
x
*
wxScaled
.
z
*
(
-
dx
.
z
*
dx
.
y
)
+
fp3
.
z
*
(
dz
.
z
*
sy3
-
t31
)
+
fp3
.
y
*
((
-
dx
.
y
*
dy
.
z
+
dz
.
x
)
*
wxScaled
.
z
+
dy
.
z
*
sy3
+
dx
.
y
*
t33
);
f3
.
z
+=
fp3
.
x
*
wxScaled
.
z
*
(
1
-
dx
.
z
*
dx
.
z
)
+
fp3
.
z
*
(
dz
.
z
*
sz3
)
+
fp3
.
y
*
((
-
dx
.
z
*
dy
.
z
)
*
wxScaled
.
z
+
dy
.
z
*
sz3
-
dx
.
x
*
t31
-
dx
.
y
*
t32
)
+
f
.
z
*
originWeights
.
z
;
addForce
(
atoms
.
y
,
force
,
f1
);
addForce
(
atoms
.
z
,
force
,
f2
);
addForce
(
atoms
.
w
,
force
,
f3
);
}
}
}
}
...
...
platforms/cuda/src/kernels/utilities.cu
View file @
18295108
...
@@ -73,6 +73,25 @@ __global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __res
...
@@ -73,6 +73,25 @@ __global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __res
clearSingleBuffer
(
buffer6
,
size6
);
clearSingleBuffer
(
buffer6
,
size6
);
}
}
/**
* Sum the energy buffer.
*/
__global__
void
reduceEnergy
(
const
mixed
*
__restrict__
energyBuffer
,
mixed
*
__restrict__
result
,
int
bufferSize
,
int
workGroupSize
)
{
extern
__shared__
mixed
tempBuffer
[];
const
unsigned
int
thread
=
threadIdx
.
x
;
mixed
sum
=
0
;
for
(
unsigned
int
index
=
thread
;
index
<
bufferSize
;
index
+=
blockDim
.
x
)
sum
+=
energyBuffer
[
index
];
tempBuffer
[
thread
]
=
sum
;
for
(
int
i
=
1
;
i
<
workGroupSize
;
i
*=
2
)
{
__syncthreads
();
if
(
thread
%
(
i
*
2
)
==
0
&&
thread
+
i
<
workGroupSize
)
tempBuffer
[
thread
]
+=
tempBuffer
[
thread
+
i
];
}
if
(
thread
==
0
)
*
result
=
tempBuffer
[
0
];
}
/**
/**
* Record the atomic charges into the posq array.
* Record the atomic charges into the posq array.
*/
*/
...
...
platforms/cuda/tests/TestCudaCustomCVForce.cpp
0 → 100644
View file @
18295108
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CudaTests.h"
#include "TestCustomCVForce.h"
void
runPlatformTests
()
{
}
platforms/cuda/tests/TestCudaFFT3D.cpp
View file @
18295108
...
@@ -56,7 +56,7 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
...
@@ -56,7 +56,7 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
system
.
addParticle
(
0.0
);
system
.
addParticle
(
0.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
"false"
,
1
);
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
"false"
,
1
,
NULL
);
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
initialize
();
OpenMM_SFMT
::
SFMT
sfmt
;
OpenMM_SFMT
::
SFMT
sfmt
;
...
...
platforms/cuda/tests/TestCudaRandom.cpp
View file @
18295108
...
@@ -56,7 +56,7 @@ void testGaussian() {
...
@@ -56,7 +56,7 @@ void testGaussian() {
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
"false"
,
1
);
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
"false"
,
1
,
NULL
);
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
initialize
();
context
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
0
);
context
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
0
);
...
...
platforms/cuda/tests/TestCudaSort.cpp
View file @
18295108
...
@@ -66,7 +66,7 @@ void verifySorting(vector<float> array) {
...
@@ -66,7 +66,7 @@ void verifySorting(vector<float> array) {
system
.
addParticle
(
0.0
);
system
.
addParticle
(
0.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
"false"
,
1
);
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
"false"
,
1
,
NULL
);
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
initialize
();
CudaArray
data
(
context
,
array
.
size
(),
4
,
"sortData"
);
CudaArray
data
(
context
,
array
.
size
(),
4
,
"sortData"
);
...
...
platforms/opencl/include/OpenCLContext.h
View file @
18295108
...
@@ -163,7 +163,8 @@ public:
...
@@ -163,7 +163,8 @@ public:
class
ForcePostComputation
;
class
ForcePostComputation
;
static
const
int
ThreadBlockSize
;
static
const
int
ThreadBlockSize
;
static
const
int
TileSize
;
static
const
int
TileSize
;
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
std
::
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
);
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
std
::
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
,
OpenCLContext
*
originalContext
);
~
OpenCLContext
();
~
OpenCLContext
();
/**
/**
* This is called to initialize internal data structures after all Forces in the system
* This is called to initialize internal data structures after all Forces in the system
...
@@ -363,9 +364,13 @@ public:
...
@@ -363,9 +364,13 @@ public:
*/
*/
void
reduceBuffer
(
OpenCLArray
&
array
,
int
numBuffers
);
void
reduceBuffer
(
OpenCLArray
&
array
,
int
numBuffers
);
/**
/**
* Sum the buffe
s
r containing forces.
* Sum the buffer
s
containing forces.
*/
*/
void
reduceForces
();
void
reduceForces
();
/**
* Sum the buffer containing energy.
*/
double
reduceEnergy
();
/**
/**
* Get the current simulation time.
* Get the current simulation time.
*/
*/
...
@@ -749,6 +754,7 @@ private:
...
@@ -749,6 +754,7 @@ private:
cl
::
Kernel
clearSixBuffersKernel
;
cl
::
Kernel
clearSixBuffersKernel
;
cl
::
Kernel
reduceReal4Kernel
;
cl
::
Kernel
reduceReal4Kernel
;
cl
::
Kernel
reduceForcesKernel
;
cl
::
Kernel
reduceForcesKernel
;
cl
::
Kernel
reduceEnergyKernel
;
cl
::
Kernel
setChargesKernel
;
cl
::
Kernel
setChargesKernel
;
std
::
vector
<
OpenCLForceInfo
*>
forces
;
std
::
vector
<
OpenCLForceInfo
*>
forces
;
std
::
vector
<
Molecule
>
molecules
;
std
::
vector
<
Molecule
>
molecules
;
...
@@ -763,6 +769,7 @@ private:
...
@@ -763,6 +769,7 @@ private:
OpenCLArray
*
forceBuffers
;
OpenCLArray
*
forceBuffers
;
OpenCLArray
*
longForceBuffer
;
OpenCLArray
*
longForceBuffer
;
OpenCLArray
*
energyBuffer
;
OpenCLArray
*
energyBuffer
;
OpenCLArray
*
energySum
;
OpenCLArray
*
energyParamDerivBuffer
;
OpenCLArray
*
energyParamDerivBuffer
;
OpenCLArray
*
atomIndexDevice
;
OpenCLArray
*
atomIndexDevice
;
OpenCLArray
*
chargeBuffer
;
OpenCLArray
*
chargeBuffer
;
...
...
platforms/opencl/include/OpenCLIntegrationUtilities.h
View file @
18295108
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
4
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -156,8 +156,11 @@ private:
...
@@ -156,8 +156,11 @@ private:
OpenCLArray
*
vsite3AvgWeights
;
OpenCLArray
*
vsite3AvgWeights
;
OpenCLArray
*
vsiteOutOfPlaneAtoms
;
OpenCLArray
*
vsiteOutOfPlaneAtoms
;
OpenCLArray
*
vsiteOutOfPlaneWeights
;
OpenCLArray
*
vsiteOutOfPlaneWeights
;
OpenCLArray
*
vsiteLocalCoordsIndex
;
OpenCLArray
*
vsiteLocalCoordsAtoms
;
OpenCLArray
*
vsiteLocalCoordsAtoms
;
OpenCLArray
*
vsiteLocalCoordsParams
;
OpenCLArray
*
vsiteLocalCoordsWeights
;
OpenCLArray
*
vsiteLocalCoordsPos
;
OpenCLArray
*
vsiteLocalCoordsStartIndex
;
int
randomPos
;
int
randomPos
;
int
lastSeed
,
numVsites
;
int
lastSeed
,
numVsites
;
bool
hasInitializedPosConstraintKernels
,
hasInitializedVelConstraintKernels
,
ccmaUseDirectBuffer
,
hasOverlappingVsites
;
bool
hasInitializedPosConstraintKernels
,
hasInitializedVelConstraintKernels
,
ccmaUseDirectBuffer
,
hasOverlappingVsites
;
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
18295108
...
@@ -37,6 +37,7 @@
...
@@ -37,6 +37,7 @@
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/CustomIntegratorUtilities.h"
#include "openmm/internal/CustomIntegratorUtilities.h"
#include "lepton/CompiledExpression.h"
#include "lepton/CompiledExpression.h"
#include "lepton/ExpressionProgram.h"
#include "openmm/System.h"
#include "openmm/System.h"
namespace
OpenMM
{
namespace
OpenMM
{
...
@@ -1207,6 +1208,54 @@ private:
...
@@ -1207,6 +1208,54 @@ private:
cl
::
Kernel
framesKernel
,
blockBoundsKernel
,
neighborsKernel
,
forceKernel
,
torqueKernel
;
cl
::
Kernel
framesKernel
,
blockBoundsKernel
,
neighborsKernel
,
forceKernel
,
torqueKernel
;
};
};
/**
* This kernel is invoked by CustomCVForce to calculate the forces acting on the system and the energy of the system.
*/
class
OpenCLCalcCustomCVForceKernel
:
public
CalcCustomCVForceKernel
{
public:
OpenCLCalcCustomCVForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
CalcCustomCVForceKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
),
invAtomOrder
(
NULL
),
innerInvAtomOrder
(
NULL
)
{
}
~
OpenCLCalcCustomCVForceKernel
();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomCVForce this kernel will be used for
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/
void
initialize
(
const
System
&
system
,
const
CustomCVForce
&
force
,
ContextImpl
&
innerContext
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
ContextImpl
&
innerContext
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy state information to the inner context.
*
* @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/
void
copyState
(
ContextImpl
&
context
,
ContextImpl
&
innerContext
);
private:
class
ReorderListener
;
OpenCLContext
&
cl
;
bool
hasInitializedKernels
;
Lepton
::
ExpressionProgram
energyExpression
;
std
::
vector
<
std
::
string
>
variableNames
,
paramDerivNames
,
globalParameterNames
;
std
::
vector
<
Lepton
::
ExpressionProgram
>
variableDerivExpressions
;
std
::
vector
<
Lepton
::
ExpressionProgram
>
paramDerivExpressions
;
std
::
vector
<
OpenCLArray
*>
cvForces
;
OpenCLArray
*
invAtomOrder
;
OpenCLArray
*
innerInvAtomOrder
;
cl
::
Kernel
copyStateKernel
,
copyForcesKernel
,
addForcesKernel
;
};
/**
/**
* This kernel is invoked by VerletIntegrator to take one time step.
* This kernel is invoked by VerletIntegrator to take one time step.
*/
*/
...
@@ -1472,7 +1521,9 @@ private:
...
@@ -1472,7 +1521,9 @@ private:
class
ReorderListener
;
class
ReorderListener
;
class
GlobalTarget
;
class
GlobalTarget
;
class
DerivFunction
;
class
DerivFunction
;
std
::
string
createPerDofComputation
(
const
std
::
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
int
component
,
CustomIntegrator
&
integrator
,
const
std
::
string
&
forceName
,
const
std
::
string
&
energyName
);
std
::
string
createPerDofComputation
(
const
std
::
string
&
variable
,
const
Lepton
::
ParsedExpression
&
expr
,
int
component
,
CustomIntegrator
&
integrator
,
const
std
::
string
&
forceName
,
const
std
::
string
&
energyName
,
std
::
vector
<
const
TabulatedFunction
*>&
functions
,
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functionNames
);
void
prepareForComputation
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
void
prepareForComputation
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
Lepton
::
ExpressionTreeNode
replaceDerivFunctions
(
const
Lepton
::
ExpressionTreeNode
&
node
,
OpenMM
::
ContextImpl
&
context
);
Lepton
::
ExpressionTreeNode
replaceDerivFunctions
(
const
Lepton
::
ExpressionTreeNode
&
node
,
OpenMM
::
ContextImpl
&
context
);
void
findExpressionsForDerivs
(
const
Lepton
::
ExpressionTreeNode
&
node
,
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
variableNodes
);
void
findExpressionsForDerivs
(
const
Lepton
::
ExpressionTreeNode
&
node
,
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
variableNodes
);
...
@@ -1482,7 +1533,7 @@ private:
...
@@ -1482,7 +1533,7 @@ private:
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
double
energy
;
double
energy
;
float
energyFloat
;
float
energyFloat
;
int
numGlobalVariables
;
int
numGlobalVariables
,
sumWorkGroupSize
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
deviceGlobalsAreCurrent
,
modifiesParameters
,
keNeedsForce
,
hasAnyConstraints
,
needsEnergyParamDerivs
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
deviceGlobalsAreCurrent
,
modifiesParameters
,
keNeedsForce
,
hasAnyConstraints
,
needsEnergyParamDerivs
;
mutable
bool
localValuesAreCurrent
;
mutable
bool
localValuesAreCurrent
;
OpenCLArray
*
globalValues
;
OpenCLArray
*
globalValues
;
...
@@ -1491,6 +1542,8 @@ private:
...
@@ -1491,6 +1542,8 @@ private:
OpenCLArray
*
uniformRandoms
;
OpenCLArray
*
uniformRandoms
;
OpenCLArray
*
randomSeed
;
OpenCLArray
*
randomSeed
;
OpenCLArray
*
perDofEnergyParamDerivs
;
OpenCLArray
*
perDofEnergyParamDerivs
;
std
::
vector
<
OpenCLArray
*>
tabulatedFunctions
;
std
::
map
<
int
,
double
>
savedEnergy
;
std
::
map
<
int
,
OpenCLArray
*>
savedForces
;
std
::
map
<
int
,
OpenCLArray
*>
savedForces
;
std
::
set
<
int
>
validSavedForces
;
std
::
set
<
int
>
validSavedForces
;
OpenCLParameterSet
*
perDofValues
;
OpenCLParameterSet
*
perDofValues
;
...
@@ -1573,7 +1626,7 @@ private:
...
@@ -1573,7 +1626,7 @@ private:
class
OpenCLApplyMonteCarloBarostatKernel
:
public
ApplyMonteCarloBarostatKernel
{
class
OpenCLApplyMonteCarloBarostatKernel
:
public
ApplyMonteCarloBarostatKernel
{
public:
public:
OpenCLApplyMonteCarloBarostatKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
ApplyMonteCarloBarostatKernel
(
name
,
platform
),
cl
(
cl
),
OpenCLApplyMonteCarloBarostatKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
ApplyMonteCarloBarostatKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
),
savedPositions
(
NULL
),
moleculeAtoms
(
NULL
),
moleculeStartIndex
(
NULL
)
{
hasInitializedKernels
(
false
),
savedPositions
(
NULL
),
savedForces
(
NULL
),
moleculeAtoms
(
NULL
),
moleculeStartIndex
(
NULL
)
{
}
}
~
OpenCLApplyMonteCarloBarostatKernel
();
~
OpenCLApplyMonteCarloBarostatKernel
();
/**
/**
...
@@ -1608,6 +1661,7 @@ private:
...
@@ -1608,6 +1661,7 @@ private:
bool
hasInitializedKernels
;
bool
hasInitializedKernels
;
int
numMolecules
;
int
numMolecules
;
OpenCLArray
*
savedPositions
;
OpenCLArray
*
savedPositions
;
OpenCLArray
*
savedForces
;
OpenCLArray
*
moleculeAtoms
;
OpenCLArray
*
moleculeAtoms
;
OpenCLArray
*
moleculeStartIndex
;
OpenCLArray
*
moleculeStartIndex
;
cl
::
Kernel
kernel
;
cl
::
Kernel
kernel
;
...
...
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment