Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
18295108
Commit
18295108
authored
Sep 05, 2017
by
peastman
Browse files
Merge changes from main branch
parents
e6101f68
8d7234e5
Changes
154
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
755 additions
and
214 deletions
+755
-214
platforms/opencl/include/OpenCLPlatform.h
platforms/opencl/include/OpenCLPlatform.h
+2
-1
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+52
-6
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
+64
-36
platforms/opencl/src/OpenCLKernelFactory.cpp
platforms/opencl/src/OpenCLKernelFactory.cpp
+2
-0
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+278
-47
platforms/opencl/src/OpenCLPlatform.cpp
platforms/opencl/src/OpenCLPlatform.cpp
+22
-5
platforms/opencl/src/kernels/customCVForce.cl
platforms/opencl/src/kernels/customCVForce.cl
+38
-0
platforms/opencl/src/kernels/customHbondForce.cl
platforms/opencl/src/kernels/customHbondForce.cl
+8
-15
platforms/opencl/src/kernels/customManyParticle.cl
platforms/opencl/src/kernels/customManyParticle.cl
+5
-0
platforms/opencl/src/kernels/utilities.cl
platforms/opencl/src/kernels/utilities.cl
+18
-0
platforms/opencl/src/kernels/virtualSites.cl
platforms/opencl/src/kernels/virtualSites.cl
+72
-100
platforms/opencl/tests/TestOpenCLCustomCVForce.cpp
platforms/opencl/tests/TestOpenCLCustomCVForce.cpp
+36
-0
platforms/opencl/tests/TestOpenCLFFT.cpp
platforms/opencl/tests/TestOpenCLFFT.cpp
+1
-1
platforms/opencl/tests/TestOpenCLRandom.cpp
platforms/opencl/tests/TestOpenCLRandom.cpp
+1
-1
platforms/opencl/tests/TestOpenCLSort.cpp
platforms/opencl/tests/TestOpenCLSort.cpp
+1
-1
platforms/reference/include/ReferenceCustomCVForce.h
platforms/reference/include/ReferenceCustomCVForce.h
+72
-0
platforms/reference/include/ReferenceKernels.h
platforms/reference/include/ReferenceKernels.h
+39
-0
platforms/reference/src/ReferenceKernelFactory.cpp
platforms/reference/src/ReferenceKernelFactory.cpp
+2
-0
platforms/reference/src/ReferenceKernels.cpp
platforms/reference/src/ReferenceKernels.cpp
+41
-1
platforms/reference/src/ReferencePlatform.cpp
platforms/reference/src/ReferencePlatform.cpp
+1
-0
No files found.
platforms/opencl/include/OpenCLPlatform.h
View file @
18295108
...
@@ -53,6 +53,7 @@ public:
...
@@ -53,6 +53,7 @@ public:
const
std
::
string
&
getPropertyValue
(
const
Context
&
context
,
const
std
::
string
&
property
)
const
;
const
std
::
string
&
getPropertyValue
(
const
Context
&
context
,
const
std
::
string
&
property
)
const
;
void
setPropertyValue
(
Context
&
context
,
const
std
::
string
&
property
,
const
std
::
string
&
value
)
const
;
void
setPropertyValue
(
Context
&
context
,
const
std
::
string
&
property
,
const
std
::
string
&
value
)
const
;
void
contextCreated
(
ContextImpl
&
context
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
properties
)
const
;
void
contextCreated
(
ContextImpl
&
context
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
properties
)
const
;
void
linkedContextCreated
(
ContextImpl
&
context
,
ContextImpl
&
originalContext
)
const
;
void
contextDestroyed
(
ContextImpl
&
context
)
const
;
void
contextDestroyed
(
ContextImpl
&
context
)
const
;
/**
/**
* This is the name of the parameter for selecting which OpenCL device or devices to use.
* This is the name of the parameter for selecting which OpenCL device or devices to use.
...
@@ -108,7 +109,7 @@ public:
...
@@ -108,7 +109,7 @@ public:
class
OPENMM_EXPORT_OPENCL
OpenCLPlatform
::
PlatformData
{
class
OPENMM_EXPORT_OPENCL
OpenCLPlatform
::
PlatformData
{
public:
public:
PlatformData
(
const
System
&
system
,
const
std
::
string
&
platformPropValue
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
precisionProperty
,
PlatformData
(
const
System
&
system
,
const
std
::
string
&
platformPropValue
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
precisionProperty
,
const
std
::
string
&
cpuPmeProperty
,
const
std
::
string
&
pmeStreamProperty
,
int
numThreads
);
const
std
::
string
&
cpuPmeProperty
,
const
std
::
string
&
pmeStreamProperty
,
int
numThreads
,
ContextImpl
*
originalContext
);
~
PlatformData
();
~
PlatformData
();
void
initializeContexts
(
const
System
&
system
);
void
initializeContexts
(
const
System
&
system
);
void
syncContexts
();
void
syncContexts
();
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
18295108
...
@@ -67,9 +67,9 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
...
@@ -67,9 +67,9 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
std
::
cerr
<<
"OpenCL internal error: "
<<
errinfo
<<
std
::
endl
;
std
::
cerr
<<
"OpenCL internal error: "
<<
errinfo
<<
std
::
endl
;
}
}
OpenCLContext
::
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
)
:
OpenCLContext
::
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
,
OpenCLContext
*
originalContext
)
:
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
atomsWereReordered
(
false
),
posq
(
NULL
),
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
atomsWereReordered
(
false
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
forceBuffers
(
NULL
),
longForceBuffer
(
NULL
),
energyBuffer
(
NULL
),
energyParamDerivBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
forceBuffers
(
NULL
),
longForceBuffer
(
NULL
),
energyBuffer
(
NULL
),
energySum
(
NULL
),
energyParamDerivBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
chargeBuffer
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
chargeBuffer
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
if
(
precision
==
"single"
)
{
if
(
precision
==
"single"
)
{
useDoublePrecision
=
false
;
useDoublePrecision
=
false
;
...
@@ -261,8 +261,14 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -261,8 +261,14 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
vector
<
cl
::
Device
>
contextDevices
;
vector
<
cl
::
Device
>
contextDevices
;
contextDevices
.
push_back
(
device
);
contextDevices
.
push_back
(
device
);
cl_context_properties
cprops
[]
=
{
CL_CONTEXT_PLATFORM
,
(
cl_context_properties
)
platforms
[
bestPlatform
](),
0
};
cl_context_properties
cprops
[]
=
{
CL_CONTEXT_PLATFORM
,
(
cl_context_properties
)
platforms
[
bestPlatform
](),
0
};
context
=
cl
::
Context
(
contextDevices
,
cprops
,
errorCallback
);
if
(
originalContext
==
NULL
)
{
defaultQueue
=
cl
::
CommandQueue
(
context
,
device
);
context
=
cl
::
Context
(
contextDevices
,
cprops
,
errorCallback
);
defaultQueue
=
cl
::
CommandQueue
(
context
,
device
);
}
else
{
context
=
originalContext
->
context
;
defaultQueue
=
originalContext
->
defaultQueue
;
}
currentQueue
=
defaultQueue
;
currentQueue
=
defaultQueue
;
numAtoms
=
system
.
getNumParticles
();
numAtoms
=
system
.
getNumParticles
();
paddedNumAtoms
=
TileSize
*
((
numAtoms
+
TileSize
-
1
)
/
TileSize
);
paddedNumAtoms
=
TileSize
*
((
numAtoms
+
TileSize
-
1
)
/
TileSize
);
...
@@ -309,6 +315,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -309,6 +315,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
reduceReal4Kernel
=
cl
::
Kernel
(
utilities
,
"reduceReal4Buffer"
);
reduceReal4Kernel
=
cl
::
Kernel
(
utilities
,
"reduceReal4Buffer"
);
if
(
supports64BitGlobalAtomics
)
if
(
supports64BitGlobalAtomics
)
reduceForcesKernel
=
cl
::
Kernel
(
utilities
,
"reduceForces"
);
reduceForcesKernel
=
cl
::
Kernel
(
utilities
,
"reduceForces"
);
reduceEnergyKernel
=
cl
::
Kernel
(
utilities
,
"reduceEnergy"
);
setChargesKernel
=
cl
::
Kernel
(
utilities
,
"setCharges"
);
setChargesKernel
=
cl
::
Kernel
(
utilities
,
"setCharges"
);
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
...
@@ -436,6 +443,8 @@ OpenCLContext::~OpenCLContext() {
...
@@ -436,6 +443,8 @@ OpenCLContext::~OpenCLContext() {
delete
longForceBuffer
;
delete
longForceBuffer
;
if
(
energyBuffer
!=
NULL
)
if
(
energyBuffer
!=
NULL
)
delete
energyBuffer
;
delete
energyBuffer
;
if
(
energySum
!=
NULL
)
delete
energySum
;
if
(
energyParamDerivBuffer
!=
NULL
)
if
(
energyParamDerivBuffer
!=
NULL
)
delete
energyParamDerivBuffer
;
delete
energyParamDerivBuffer
;
if
(
atomIndexDevice
!=
NULL
)
if
(
atomIndexDevice
!=
NULL
)
...
@@ -465,11 +474,19 @@ void OpenCLContext::initialize() {
...
@@ -465,11 +474,19 @@ void OpenCLContext::initialize() {
forceBuffers
=
OpenCLArray
::
create
<
mm_double4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
forceBuffers
=
OpenCLArray
::
create
<
mm_double4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
=
OpenCLArray
::
create
<
mm_double4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
force
=
OpenCLArray
::
create
<
mm_double4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energySum
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
1
,
"energySum"
);
}
}
else
{
else
if
(
useMixedPrecision
)
{
forceBuffers
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
forceBuffers
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
force
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energySum
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
1
,
"energySum"
);
}
else
{
forceBuffers
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_float
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energySum
=
OpenCLArray
::
create
<
cl_float
>
(
*
this
,
1
,
"energySum"
);
}
}
if
(
supports64BitGlobalAtomics
)
{
if
(
supports64BitGlobalAtomics
)
{
longForceBuffer
=
OpenCLArray
::
create
<
cl_long
>
(
*
this
,
3
*
paddedNumAtoms
,
"longForceBuffer"
);
longForceBuffer
=
OpenCLArray
::
create
<
cl_long
>
(
*
this
,
3
*
paddedNumAtoms
,
"longForceBuffer"
);
...
@@ -750,6 +767,28 @@ void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) {
...
@@ -750,6 +767,28 @@ void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) {
executeKernel
(
reduceReal4Kernel
,
bufferSize
,
128
);
executeKernel
(
reduceReal4Kernel
,
bufferSize
,
128
);
}
}
double
OpenCLContext
::
reduceEnergy
()
{
int
workGroupSize
=
device
.
getInfo
<
CL_DEVICE_MAX_WORK_GROUP_SIZE
>
();
if
(
workGroupSize
>
512
)
workGroupSize
=
512
;
reduceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
energyBuffer
->
getDeviceBuffer
());
reduceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
energySum
->
getDeviceBuffer
());
reduceEnergyKernel
.
setArg
<
cl_int
>
(
2
,
energyBuffer
->
getSize
());
reduceEnergyKernel
.
setArg
<
cl_int
>
(
3
,
workGroupSize
);
reduceEnergyKernel
.
setArg
(
4
,
workGroupSize
*
energyBuffer
->
getElementSize
(),
NULL
);
executeKernel
(
reduceEnergyKernel
,
workGroupSize
,
workGroupSize
);
if
(
getUseDoublePrecision
()
||
getUseMixedPrecision
())
{
double
energy
;
energySum
->
download
(
&
energy
);
return
energy
;
}
else
{
float
energy
;
energySum
->
download
(
&
energy
);
return
energy
;
}
}
void
OpenCLContext
::
setCharges
(
const
vector
<
double
>&
charges
)
{
void
OpenCLContext
::
setCharges
(
const
vector
<
double
>&
charges
)
{
if
(
chargeBuffer
==
NULL
)
if
(
chargeBuffer
==
NULL
)
chargeBuffer
=
new
OpenCLArray
(
*
this
,
numAtoms
,
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
),
"chargeBuffer"
);
chargeBuffer
=
new
OpenCLArray
(
*
this
,
numAtoms
,
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
),
"chargeBuffer"
);
...
@@ -939,9 +978,16 @@ void OpenCLContext::findMoleculeGroups() {
...
@@ -939,9 +978,16 @@ void OpenCLContext::findMoleculeGroups() {
for
(
int
i
=
0
;
i
<
(
int
)
forces
.
size
()
&&
identical
;
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
forces
.
size
()
&&
identical
;
i
++
)
{
if
(
mol
.
groups
[
i
].
size
()
!=
mol2
.
groups
[
i
].
size
())
if
(
mol
.
groups
[
i
].
size
()
!=
mol2
.
groups
[
i
].
size
())
identical
=
false
;
identical
=
false
;
for
(
int
k
=
0
;
k
<
(
int
)
mol
.
groups
[
i
].
size
()
&&
identical
;
k
++
)
for
(
int
k
=
0
;
k
<
(
int
)
mol
.
groups
[
i
].
size
()
&&
identical
;
k
++
)
{
if
(
!
forces
[
i
]
->
areGroupsIdentical
(
mol
.
groups
[
i
][
k
],
mol2
.
groups
[
i
][
k
]))
if
(
!
forces
[
i
]
->
areGroupsIdentical
(
mol
.
groups
[
i
][
k
],
mol2
.
groups
[
i
][
k
]))
identical
=
false
;
identical
=
false
;
vector
<
int
>
p1
,
p2
;
forces
[
i
]
->
getParticlesInGroup
(
mol
.
groups
[
i
][
k
],
p1
);
forces
[
i
]
->
getParticlesInGroup
(
mol2
.
groups
[
i
][
k
],
p2
);
for
(
int
m
=
0
;
m
<
p1
.
size
();
m
++
)
if
(
p1
[
m
]
!=
p2
[
m
]
-
atomOffset
)
identical
=
false
;
}
}
}
if
(
identical
)
{
if
(
identical
)
{
moleculeInstances
[
j
].
push_back
(
molIndex
);
moleculeInstances
[
j
].
push_back
(
molIndex
);
...
...
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
View file @
18295108
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -102,7 +102,8 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
...
@@ -102,7 +102,8 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
ccmaReducedMass
(
NULL
),
ccmaAtomConstraints
(
NULL
),
ccmaNumAtomConstraints
(
NULL
),
ccmaConstraintMatrixColumn
(
NULL
),
ccmaReducedMass
(
NULL
),
ccmaAtomConstraints
(
NULL
),
ccmaNumAtomConstraints
(
NULL
),
ccmaConstraintMatrixColumn
(
NULL
),
ccmaConstraintMatrixValue
(
NULL
),
ccmaDelta1
(
NULL
),
ccmaDelta2
(
NULL
),
ccmaConverged
(
NULL
),
ccmaConvergedHostBuffer
(
NULL
),
ccmaConstraintMatrixValue
(
NULL
),
ccmaDelta1
(
NULL
),
ccmaDelta2
(
NULL
),
ccmaConverged
(
NULL
),
ccmaConvergedHostBuffer
(
NULL
),
vsite2AvgAtoms
(
NULL
),
vsite2AvgWeights
(
NULL
),
vsite3AvgAtoms
(
NULL
),
vsite3AvgWeights
(
NULL
),
vsite2AvgAtoms
(
NULL
),
vsite2AvgWeights
(
NULL
),
vsite3AvgAtoms
(
NULL
),
vsite3AvgWeights
(
NULL
),
vsiteOutOfPlaneAtoms
(
NULL
),
vsiteOutOfPlaneWeights
(
NULL
),
vsiteLocalCoordsAtoms
(
NULL
),
vsiteLocalCoordsParams
(
NULL
),
vsiteOutOfPlaneAtoms
(
NULL
),
vsiteOutOfPlaneWeights
(
NULL
),
vsiteLocalCoordsIndex
(
NULL
),
vsiteLocalCoordsAtoms
(
NULL
),
vsiteLocalCoordsWeights
(
NULL
),
vsiteLocalCoordsPos
(
NULL
),
vsiteLocalCoordsStartIndex
(
NULL
),
hasInitializedPosConstraintKernels
(
false
),
hasInitializedVelConstraintKernels
(
false
),
hasOverlappingVsites
(
false
)
{
hasInitializedPosConstraintKernels
(
false
),
hasInitializedVelConstraintKernels
(
false
),
hasOverlappingVsites
(
false
)
{
// Create workspace arrays.
// Create workspace arrays.
...
@@ -497,8 +498,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
...
@@ -497,8 +498,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vector
<
mm_double4
>
vsite3AvgWeightVec
;
vector
<
mm_double4
>
vsite3AvgWeightVec
;
vector
<
mm_int4
>
vsiteOutOfPlaneAtomVec
;
vector
<
mm_int4
>
vsiteOutOfPlaneAtomVec
;
vector
<
mm_double4
>
vsiteOutOfPlaneWeightVec
;
vector
<
mm_double4
>
vsiteOutOfPlaneWeightVec
;
vector
<
mm_int4
>
vsiteLocalCoordsAtomVec
;
vector
<
cl_int
>
vsiteLocalCoordsIndexVec
;
vector
<
cl_double
>
vsiteLocalCoordsParamVec
;
vector
<
cl_int
>
vsiteLocalCoordsAtomVec
;
vector
<
cl_int
>
vsiteLocalCoordsStartVec
;
vector
<
cl_double
>
vsiteLocalCoordsWeightVec
;
vector
<
mm_double4
>
vsiteLocalCoordsPosVec
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
if
(
system
.
isVirtualSite
(
i
))
{
if
(
system
.
isVirtualSite
(
i
))
{
if
(
dynamic_cast
<
const
TwoParticleAverageSite
*>
(
&
system
.
getVirtualSite
(
i
))
!=
NULL
)
{
if
(
dynamic_cast
<
const
TwoParticleAverageSite
*>
(
&
system
.
getVirtualSite
(
i
))
!=
NULL
)
{
...
@@ -523,65 +527,73 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
...
@@ -523,65 +527,73 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsiteOutOfPlaneWeightVec
.
push_back
(
mm_double4
(
site
.
getWeight12
(),
site
.
getWeight13
(),
site
.
getWeightCross
(),
0.0
));
vsiteOutOfPlaneWeightVec
.
push_back
(
mm_double4
(
site
.
getWeight12
(),
site
.
getWeight13
(),
site
.
getWeightCross
(),
0.0
));
}
}
else
if
(
dynamic_cast
<
const
LocalCoordinatesSite
*>
(
&
system
.
getVirtualSite
(
i
))
!=
NULL
)
{
else
if
(
dynamic_cast
<
const
LocalCoordinatesSite
*>
(
&
system
.
getVirtualSite
(
i
))
!=
NULL
)
{
// A
n out of plane
site.
// A
local coordinates
site.
const
LocalCoordinatesSite
&
site
=
dynamic_cast
<
const
LocalCoordinatesSite
&>
(
system
.
getVirtualSite
(
i
));
const
LocalCoordinatesSite
&
site
=
dynamic_cast
<
const
LocalCoordinatesSite
&>
(
system
.
getVirtualSite
(
i
));
vsiteLocalCoordsAtomVec
.
push_back
(
mm_int4
(
i
,
site
.
getParticle
(
0
),
site
.
getParticle
(
1
),
site
.
getParticle
(
2
)));
int
numParticles
=
site
.
getNumParticles
();
Vec3
origin
=
site
.
getOriginWeights
();
vector
<
double
>
origin
,
x
,
y
;
Vec3
x
=
site
.
getXWeights
();
site
.
getOriginWeights
(
origin
);
Vec3
y
=
site
.
getYWeights
();
site
.
getXWeights
(
x
);
site
.
getYWeights
(
y
);
vsiteLocalCoordsIndexVec
.
push_back
(
i
);
vsiteLocalCoordsStartVec
.
push_back
(
vsiteLocalCoordsAtomVec
.
size
());
for
(
int
j
=
0
;
j
<
numParticles
;
j
++
)
{
vsiteLocalCoordsAtomVec
.
push_back
(
site
.
getParticle
(
j
));
vsiteLocalCoordsWeightVec
.
push_back
(
origin
[
j
]);
vsiteLocalCoordsWeightVec
.
push_back
(
x
[
j
]);
vsiteLocalCoordsWeightVec
.
push_back
(
y
[
j
]);
}
Vec3
pos
=
site
.
getLocalPosition
();
Vec3
pos
=
site
.
getLocalPosition
();
vsiteLocalCoordsParamVec
.
push_back
(
origin
[
0
]);
vsiteLocalCoordsPosVec
.
push_back
(
mm_double4
(
pos
[
0
],
pos
[
1
],
pos
[
2
],
0.0
));
vsiteLocalCoordsParamVec
.
push_back
(
origin
[
1
]);
vsiteLocalCoordsParamVec
.
push_back
(
origin
[
2
]);
vsiteLocalCoordsParamVec
.
push_back
(
x
[
0
]);
vsiteLocalCoordsParamVec
.
push_back
(
x
[
1
]);
vsiteLocalCoordsParamVec
.
push_back
(
x
[
2
]);
vsiteLocalCoordsParamVec
.
push_back
(
y
[
0
]);
vsiteLocalCoordsParamVec
.
push_back
(
y
[
1
]);
vsiteLocalCoordsParamVec
.
push_back
(
y
[
2
]);
vsiteLocalCoordsParamVec
.
push_back
(
pos
[
0
]);
vsiteLocalCoordsParamVec
.
push_back
(
pos
[
1
]);
vsiteLocalCoordsParamVec
.
push_back
(
pos
[
2
]);
}
}
}
}
}
}
vsiteLocalCoordsStartVec
.
push_back
(
vsiteLocalCoordsAtomVec
.
size
());
int
num2Avg
=
vsite2AvgAtomVec
.
size
();
int
num2Avg
=
vsite2AvgAtomVec
.
size
();
int
num3Avg
=
vsite3AvgAtomVec
.
size
();
int
num3Avg
=
vsite3AvgAtomVec
.
size
();
int
numOutOfPlane
=
vsiteOutOfPlaneAtomVec
.
size
();
int
numOutOfPlane
=
vsiteOutOfPlaneAtomVec
.
size
();
int
numLocalCoords
=
vsiteLocalCoords
Atom
Vec
.
size
();
int
numLocalCoords
=
vsiteLocalCoords
Pos
Vec
.
size
();
numVsites
=
num2Avg
+
num3Avg
+
numOutOfPlane
+
numLocalCoords
;
numVsites
=
num2Avg
+
num3Avg
+
numOutOfPlane
+
numLocalCoords
;
vsite2AvgAtoms
=
OpenCLArray
::
create
<
mm_int4
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgAtoms"
);
vsite2AvgAtoms
=
OpenCLArray
::
create
<
mm_int4
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgAtoms"
);
vsite3AvgAtoms
=
OpenCLArray
::
create
<
mm_int4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgAtoms"
);
vsite3AvgAtoms
=
OpenCLArray
::
create
<
mm_int4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgAtoms"
);
vsiteOutOfPlaneAtoms
=
OpenCLArray
::
create
<
mm_int4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneAtoms"
);
vsiteOutOfPlaneAtoms
=
OpenCLArray
::
create
<
mm_int4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneAtoms"
);
vsiteLocalCoordsAtoms
=
OpenCLArray
::
create
<
mm_int4
>
(
context
,
max
(
1
,
numLocalCoords
),
"vsiteLocalCoordinatesAtoms"
);
vsiteLocalCoordsIndex
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsIndexVec
.
size
()),
"vsiteLocalCoordsIndex"
);
vsiteLocalCoordsAtoms
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsAtomVec
.
size
()),
"vsiteLocalCoordsAtoms"
);
vsiteLocalCoordsStartIndex
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsStartVec
.
size
()),
"vsiteLocalCoordsStartIndex"
);
if
(
num2Avg
>
0
)
if
(
num2Avg
>
0
)
vsite2AvgAtoms
->
upload
(
vsite2AvgAtomVec
);
vsite2AvgAtoms
->
upload
(
vsite2AvgAtomVec
);
if
(
num3Avg
>
0
)
if
(
num3Avg
>
0
)
vsite3AvgAtoms
->
upload
(
vsite3AvgAtomVec
);
vsite3AvgAtoms
->
upload
(
vsite3AvgAtomVec
);
if
(
numOutOfPlane
>
0
)
if
(
numOutOfPlane
>
0
)
vsiteOutOfPlaneAtoms
->
upload
(
vsiteOutOfPlaneAtomVec
);
vsiteOutOfPlaneAtoms
->
upload
(
vsiteOutOfPlaneAtomVec
);
if
(
numLocalCoords
>
0
)
if
(
numLocalCoords
>
0
)
{
vsiteLocalCoordsIndex
->
upload
(
vsiteLocalCoordsIndexVec
);
vsiteLocalCoordsAtoms
->
upload
(
vsiteLocalCoordsAtomVec
);
vsiteLocalCoordsAtoms
->
upload
(
vsiteLocalCoordsAtomVec
);
vsiteLocalCoordsStartIndex
->
upload
(
vsiteLocalCoordsStartVec
);
}
if
(
context
.
getUseDoublePrecision
())
{
if
(
context
.
getUseDoublePrecision
())
{
vsite2AvgWeights
=
OpenCLArray
::
create
<
mm_double2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite2AvgWeights
=
OpenCLArray
::
create
<
mm_double2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite3AvgWeights
=
OpenCLArray
::
create
<
mm_double4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsite3AvgWeights
=
OpenCLArray
::
create
<
mm_double4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsiteOutOfPlaneWeights
=
OpenCLArray
::
create
<
mm_double4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteOutOfPlaneWeights
=
OpenCLArray
::
create
<
mm_double4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteLocalCoordsParams
=
OpenCLArray
::
create
<
cl_double
>
(
context
,
max
(
1
,
12
*
numLocalCoords
),
"vsiteLocalCoordinatesParams"
);
vsiteLocalCoordsWeights
=
OpenCLArray
::
create
<
cl_double
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsWeightVec
.
size
()),
"vsiteLocalCoordsWeights"
);
vsiteLocalCoordsPos
=
OpenCLArray
::
create
<
mm_double4
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsPosVec
.
size
()),
"vsiteLocalCoordsPos"
);
if
(
num2Avg
>
0
)
if
(
num2Avg
>
0
)
vsite2AvgWeights
->
upload
(
vsite2AvgWeightVec
);
vsite2AvgWeights
->
upload
(
vsite2AvgWeightVec
);
if
(
num3Avg
>
0
)
if
(
num3Avg
>
0
)
vsite3AvgWeights
->
upload
(
vsite3AvgWeightVec
);
vsite3AvgWeights
->
upload
(
vsite3AvgWeightVec
);
if
(
numOutOfPlane
>
0
)
if
(
numOutOfPlane
>
0
)
vsiteOutOfPlaneWeights
->
upload
(
vsiteOutOfPlaneWeightVec
);
vsiteOutOfPlaneWeights
->
upload
(
vsiteOutOfPlaneWeightVec
);
if
(
numLocalCoords
>
0
)
if
(
numLocalCoords
>
0
)
{
vsiteLocalCoordsParams
->
upload
(
vsiteLocalCoordsParamVec
);
vsiteLocalCoordsWeights
->
upload
(
vsiteLocalCoordsWeightVec
);
vsiteLocalCoordsPos
->
upload
(
vsiteLocalCoordsPosVec
);
}
}
}
else
{
else
{
vsite2AvgWeights
=
OpenCLArray
::
create
<
mm_float2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite2AvgWeights
=
OpenCLArray
::
create
<
mm_float2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite3AvgWeights
=
OpenCLArray
::
create
<
mm_float4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsite3AvgWeights
=
OpenCLArray
::
create
<
mm_float4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsiteOutOfPlaneWeights
=
OpenCLArray
::
create
<
mm_float4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteOutOfPlaneWeights
=
OpenCLArray
::
create
<
mm_float4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteLocalCoordsParams
=
OpenCLArray
::
create
<
float
>
(
context
,
max
(
1
,
12
*
numLocalCoords
),
"vsiteLocalCoordinatesParams"
);
vsiteLocalCoordsWeights
=
OpenCLArray
::
create
<
cl_float
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsWeightVec
.
size
()),
"vsiteLocalCoordsWeights"
);
vsiteLocalCoordsPos
=
OpenCLArray
::
create
<
mm_float4
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsPosVec
.
size
()),
"vsiteLocalCoordsPos"
);
if
(
num2Avg
>
0
)
{
if
(
num2Avg
>
0
)
{
vector
<
mm_float2
>
floatWeights
(
num2Avg
);
vector
<
mm_float2
>
floatWeights
(
num2Avg
);
for
(
int
i
=
0
;
i
<
num2Avg
;
i
++
)
for
(
int
i
=
0
;
i
<
num2Avg
;
i
++
)
...
@@ -601,10 +613,14 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
...
@@ -601,10 +613,14 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsiteOutOfPlaneWeights
->
upload
(
floatWeights
);
vsiteOutOfPlaneWeights
->
upload
(
floatWeights
);
}
}
if
(
numLocalCoords
>
0
)
{
if
(
numLocalCoords
>
0
)
{
vector
<
cl_float
>
floatParams
(
vsiteLocalCoordsParamVec
.
size
());
vector
<
cl_float
>
floatWeights
(
vsiteLocalCoordsWeightVec
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
vsiteLocalCoordsParamVec
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
vsiteLocalCoordsWeightVec
.
size
();
i
++
)
floatParams
[
i
]
=
(
cl_float
)
vsiteLocalCoordsParamVec
[
i
];
floatWeights
[
i
]
=
(
cl_float
)
vsiteLocalCoordsWeightVec
[
i
];
vsiteLocalCoordsParams
->
upload
(
floatParams
);
vsiteLocalCoordsWeights
->
upload
(
floatWeights
);
vector
<
mm_float4
>
floatPos
(
vsiteLocalCoordsPosVec
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
vsiteLocalCoordsPosVec
.
size
();
i
++
)
floatPos
[
i
]
=
mm_float4
((
float
)
vsiteLocalCoordsPosVec
[
i
].
x
,
(
float
)
vsiteLocalCoordsPosVec
[
i
].
y
,
(
float
)
vsiteLocalCoordsPosVec
[
i
].
z
,
0.0
f
);
vsiteLocalCoordsPos
->
upload
(
floatPos
);
}
}
}
}
...
@@ -645,8 +661,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
...
@@ -645,8 +661,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite3AvgWeights
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite3AvgWeights
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneAtoms
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneAtoms
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneWeights
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneWeights
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsIndex
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsAtoms
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsAtoms
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsParams
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsWeights
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsPos
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsStartIndex
->
getDeviceBuffer
());
vsiteForceKernel
=
cl
::
Kernel
(
vsiteProgram
,
"distributeForces"
);
vsiteForceKernel
=
cl
::
Kernel
(
vsiteProgram
,
"distributeForces"
);
index
=
0
;
index
=
0
;
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getPosq
().
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getPosq
().
getDeviceBuffer
());
...
@@ -661,8 +680,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
...
@@ -661,8 +680,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite3AvgWeights
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite3AvgWeights
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneAtoms
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneAtoms
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneWeights
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneWeights
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsIndex
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsAtoms
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsAtoms
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsParams
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsWeights
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsPos
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsStartIndex
->
getDeviceBuffer
());
if
(
hasOverlappingVsites
&&
context
.
getSupports64BitGlobalAtomics
())
if
(
hasOverlappingVsites
&&
context
.
getSupports64BitGlobalAtomics
())
vsiteAddForcesKernel
=
cl
::
Kernel
(
vsiteProgram
,
"addDistributedForces"
);
vsiteAddForcesKernel
=
cl
::
Kernel
(
vsiteProgram
,
"addDistributedForces"
);
}
}
...
@@ -718,10 +740,16 @@ OpenCLIntegrationUtilities::~OpenCLIntegrationUtilities() {
...
@@ -718,10 +740,16 @@ OpenCLIntegrationUtilities::~OpenCLIntegrationUtilities() {
delete
vsiteOutOfPlaneAtoms
;
delete
vsiteOutOfPlaneAtoms
;
if
(
vsiteOutOfPlaneWeights
!=
NULL
)
if
(
vsiteOutOfPlaneWeights
!=
NULL
)
delete
vsiteOutOfPlaneWeights
;
delete
vsiteOutOfPlaneWeights
;
if
(
vsiteLocalCoordsIndex
!=
NULL
)
delete
vsiteLocalCoordsIndex
;
if
(
vsiteLocalCoordsAtoms
!=
NULL
)
if
(
vsiteLocalCoordsAtoms
!=
NULL
)
delete
vsiteLocalCoordsAtoms
;
delete
vsiteLocalCoordsAtoms
;
if
(
vsiteLocalCoordsParams
!=
NULL
)
if
(
vsiteLocalCoordsWeights
!=
NULL
)
delete
vsiteLocalCoordsParams
;
delete
vsiteLocalCoordsWeights
;
if
(
vsiteLocalCoordsPos
!=
NULL
)
delete
vsiteLocalCoordsPos
;
if
(
vsiteLocalCoordsStartIndex
!=
NULL
)
delete
vsiteLocalCoordsStartIndex
;
}
}
void
OpenCLIntegrationUtilities
::
setNextStepSize
(
double
size
)
{
void
OpenCLIntegrationUtilities
::
setNextStepSize
(
double
size
)
{
...
...
platforms/opencl/src/OpenCLKernelFactory.cpp
View file @
18295108
...
@@ -106,6 +106,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
...
@@ -106,6 +106,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return
new
OpenCLCalcCustomCentroidBondForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
return
new
OpenCLCalcCustomCentroidBondForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
if
(
name
==
CalcCustomCompoundBondForceKernel
::
Name
())
if
(
name
==
CalcCustomCompoundBondForceKernel
::
Name
())
return
new
OpenCLCalcCustomCompoundBondForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
return
new
OpenCLCalcCustomCompoundBondForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
if
(
name
==
CalcCustomCVForceKernel
::
Name
())
return
new
OpenCLCalcCustomCVForceKernel
(
name
,
platform
,
cl
);
if
(
name
==
CalcCustomManyParticleForceKernel
::
Name
())
if
(
name
==
CalcCustomManyParticleForceKernel
::
Name
())
return
new
OpenCLCalcCustomManyParticleForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
return
new
OpenCLCalcCustomManyParticleForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
if
(
name
==
CalcGayBerneForceKernel
::
Name
())
if
(
name
==
CalcGayBerneForceKernel
::
Name
())
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
18295108
...
@@ -48,6 +48,7 @@
...
@@ -48,6 +48,7 @@
#include "lepton/Operation.h"
#include "lepton/Operation.h"
#include "lepton/Parser.h"
#include "lepton/Parser.h"
#include "lepton/ParsedExpression.h"
#include "lepton/ParsedExpression.h"
#include "ReferenceTabulatedFunction.h"
#include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMUtilities.h"
#include "SimTKOpenMMUtilities.h"
#include <algorithm>
#include <algorithm>
...
@@ -138,21 +139,8 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
...
@@ -138,21 +139,8 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
sum += computation->computeForceAndEnergy(includeForces, includeEnergy, groups);
sum += computation->computeForceAndEnergy(includeForces, includeEnergy, groups);
cl.reduceForces();
cl.reduceForces();
cl.getIntegrationUtilities().distributeForcesFromVirtualSites();
cl.getIntegrationUtilities().distributeForcesFromVirtualSites();
if (includeEnergy) {
if (includeEnergy)
OpenCLArray& energyArray = cl.getEnergyBuffer();
sum += cl.reduceEnergy();
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double* energy = (double*) cl.getPinnedBuffer();
energyArray.download(energy);
for (int i = 0; i < energyArray.getSize(); i++)
sum += energy[i];
}
else {
float* energy = (float*) cl.getPinnedBuffer();
energyArray.download(energy);
for (int i = 0; i < energyArray.getSize(); i++)
sum += energy[i];
}
}
if (!cl.getForcesValid())
if (!cl.getForcesValid())
valid = false;
valid = false;
return sum;
return sum;
...
@@ -1780,7 +1768,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1780,7 +1768,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
try {
try {
cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cl.getPlatformData().context);
cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cl.getPlatformData().context);
cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha);
cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha
, false
);
cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines);
cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines);
cl::Kernel addForcesKernel = cl::Kernel(program, "addForces");
cl::Kernel addForcesKernel = cl::Kernel(program, "addForces");
pmeio = new PmeIO(cl, addForcesKernel);
pmeio = new PmeIO(cl, addForcesKernel);
...
@@ -4734,7 +4722,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -4734,7 +4722,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
const vector<int>& atoms = distance.second;
const vector<int>& atoms = distance.second;
string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]];
string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]];
if (computedDeltas.count(deltaName) == 0) {
if (computedDeltas.count(deltaName) == 0) {
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+"
, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);\n");
computedDeltas.insert(deltaName);
computedDeltas.insert(deltaName);
}
}
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real r_"+deltaName+" = SQRT(delta"+deltaName+".w);\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real r_"+deltaName+" = SQRT(delta"+deltaName+".w);\n");
...
@@ -4749,11 +4737,11 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -4749,11 +4737,11 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]];
string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]];
string angleName = "angle_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]];
string angleName = "angle_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]];
if (computedDeltas.count(deltaName1) == 0) {
if (computedDeltas.count(deltaName1) == 0) {
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[0]]+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[0]]+"
, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);\n");
computedDeltas.insert(deltaName1);
computedDeltas.insert(deltaName1);
}
}
if (computedDeltas.count(deltaName2) == 0) {
if (computedDeltas.count(deltaName2) == 0) {
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[2]]+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[2]]+"
, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);\n");
computedDeltas.insert(deltaName2);
computedDeltas.insert(deltaName2);
}
}
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real "+angleName+" = computeAngle(delta"+deltaName1+", delta"+deltaName2+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real "+angleName+" = computeAngle(delta"+deltaName1+", delta"+deltaName2+");\n");
...
@@ -4771,15 +4759,15 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -4771,15 +4759,15 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
string dihedralName = "dihedral_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]+atomNames[atoms[3]];
string dihedralName = "dihedral_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]+atomNames[atoms[3]];
if (computedDeltas.count(deltaName1) == 0) {
if (computedDeltas.count(deltaName1) == 0) {
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+"
, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);\n");
computedDeltas.insert(deltaName1);
computedDeltas.insert(deltaName1);
}
}
if (computedDeltas.count(deltaName2) == 0) {
if (computedDeltas.count(deltaName2) == 0) {
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[1]]+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[1]]+"
, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);\n");
computedDeltas.insert(deltaName2);
computedDeltas.insert(deltaName2);
}
}
if (computedDeltas.count(deltaName3) == 0) {
if (computedDeltas.count(deltaName3) == 0) {
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName3+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[3]]+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName3+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[3]]+"
, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);\n");
computedDeltas.insert(deltaName3);
computedDeltas.insert(deltaName3);
}
}
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 "+crossName1+" = computeCross(delta"+deltaName1+", delta"+deltaName2+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 "+crossName1+" = computeCross(delta"+deltaName1+", delta"+deltaName2+");\n");
...
@@ -4798,12 +4786,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
...
@@ -4798,12 +4786,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
const OpenCLNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
extraArgs << ", __global const "+buffer.getType()+"* restrict donor"+buffer.getName();
extraArgs << ", __global const "+buffer.getType()+"* restrict donor"+buffer.getName();
addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" donorParams"+cl.intToString(i+1)+" = donor"+buffer.getName()+"[
i
ndex];\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" donorParams"+cl.intToString(i+1)+" = donor"+buffer.getName()+"[
donorI
ndex];\n");
}
}
for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) {
for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i];
const OpenCLNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i];
extraArgs << ", __global const "+buffer.getType()+"* restrict acceptor"+buffer.getName();
extraArgs << ", __global const "+buffer.getType()+"* restrict acceptor"+buffer.getName();
addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" acceptorParams"+cl.intToString(i+1)+" = acceptor"+buffer.getName()+"[
i
ndex];\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" acceptorParams"+cl.intToString(i+1)+" = acceptor"+buffer.getName()+"[
acceptorI
ndex];\n");
}
}
// Now evaluate the expressions.
// Now evaluate the expressions.
...
@@ -6873,6 +6861,191 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
...
@@ -6873,6 +6861,191 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
exclusionStartIndex->upload(startIndexVec);
exclusionStartIndex->upload(startIndexVec);
}
}
class OpenCLCalcCustomCVForceKernel::ReorderListener : public OpenCLContext::ReorderListener {
public:
ReorderListener(OpenCLContext& cl, OpenCLArray& invAtomOrder) : cl(cl), invAtomOrder(invAtomOrder) {
}
void execute() {
vector<cl_int> invOrder(cl.getPaddedNumAtoms());
const vector<int>& order = cl.getAtomIndex();
for (int i = 0; i < order.size(); i++)
invOrder[order[i]] = i;
invAtomOrder.upload(invOrder);
}
private:
OpenCLContext& cl;
OpenCLArray& invAtomOrder;
};
OpenCLCalcCustomCVForceKernel::~OpenCLCalcCustomCVForceKernel() {
for (auto force : cvForces)
delete force;
if (invAtomOrder != NULL)
delete invAtomOrder;
if (innerInvAtomOrder != NULL)
delete innerInvAtomOrder;
}
void OpenCLCalcCustomCVForceKernel::initialize(const System& system, const CustomCVForce& force, ContextImpl& innerContext) {
int numCVs = force.getNumCollectiveVariables();
cl.addForce(new OpenCLForceInfo(1));
for (int i = 0; i < force.getNumGlobalParameters(); i++)
globalParameterNames.push_back(force.getGlobalParameterName(i));
// Create custom functions for the tabulated functions.
map<string, Lepton::CustomFunction*> functions;
for (int i = 0; i < (int) force.getNumTabulatedFunctions(); i++)
functions[force.getTabulatedFunctionName(i)] = createReferenceTabulatedFunction(force.getTabulatedFunction(i));
// Create the expressions.
Lepton::ParsedExpression energyExpr = Lepton::Parser::parse(force.getEnergyFunction(), functions);
energyExpression = energyExpr.createProgram();
for (int i = 0; i < numCVs; i++) {
string name = force.getCollectiveVariableName(i);
variableNames.push_back(name);
variableDerivExpressions.push_back(energyExpr.differentiate(name).optimize().createProgram());
}
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string name = force.getEnergyParameterDerivativeName(i);
paramDerivNames.push_back(name);
paramDerivExpressions.push_back(energyExpr.differentiate(name).optimize().createProgram());
cl.addEnergyParameterDerivative(name);
}
// Delete the custom functions.
for (auto& function : functions)
delete function.second;
// Copy parameter derivatives from the inner context.
OpenCLContext& cl2 = *reinterpret_cast<OpenCLPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
for (auto& param : cl2.getEnergyParamDerivNames())
cl.addEnergyParameterDerivative(param);
// Create arrays for storing information.
int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
for (int i = 0; i < numCVs; i++)
cvForces.push_back(new OpenCLArray(cl, cl.getNumAtoms(), 4*elementSize, "cvForce"));
invAtomOrder = OpenCLArray::create<cl_int>(cl, cl.getPaddedNumAtoms(), "invAtomOrder");
innerInvAtomOrder = OpenCLArray::create<cl_int>(cl, cl.getPaddedNumAtoms(), "innerInvAtomOrder");
// Create the kernels.
stringstream args, add;
for (int i = 0; i < numCVs; i++) {
args << ", __global real4* restrict force" << i << ", real dEdV" << i;
add << "f += force" << i << "[i]*dEdV" << i << ";\n";
}
map<string, string> replacements;
replacements["PARAMETER_ARGUMENTS"] = args.str();
replacements["ADD_FORCES"] = add.str();
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customCVForce, replacements));
copyStateKernel = cl::Kernel(program, "copyState");
copyForcesKernel = cl::Kernel(program, "copyForces");
addForcesKernel = cl::Kernel(program, "addForces");
}
double OpenCLCalcCustomCVForceKernel::execute(ContextImpl& context, ContextImpl& innerContext, bool includeForces, bool includeEnergy) {
copyState(context, innerContext);
int numCVs = variableNames.size();
int numAtoms = cl.getNumAtoms();
OpenCLContext& cl2 = *reinterpret_cast<OpenCLPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
vector<double> cvValues;
vector<map<string, double> > cvDerivs(numCVs);
for (int i = 0; i < numCVs; i++) {
cvValues.push_back(innerContext.calcForcesAndEnergy(true, true, 1<<i));
copyForcesKernel.setArg<cl::Buffer>(0, cvForces[i]->getDeviceBuffer());
cl.executeKernel(copyForcesKernel, numAtoms);
innerContext.getEnergyParameterDerivatives(cvDerivs[i]);
}
// Compute the energy and forces.
map<string, double> variables;
for (auto& name : globalParameterNames)
variables[name] = context.getParameter(name);
for (int i = 0; i < numCVs; i++)
variables[variableNames[i]] = cvValues[i];
double energy = energyExpression.evaluate(variables);
for (int i = 0; i < numCVs; i++) {
double dEdV = variableDerivExpressions[i].evaluate(variables);
if (cl.getUseDoublePrecision())
addForcesKernel.setArg<cl_double>(2*i+3, dEdV);
else
addForcesKernel.setArg<cl_float>(2*i+3, dEdV);
}
cl.executeKernel(addForcesKernel, numAtoms);
// Compute the energy parameter derivatives.
map<string, double>& energyParamDerivs = cl.getEnergyParamDerivWorkspace();
for (int i = 0; i < paramDerivExpressions.size(); i++)
energyParamDerivs[paramDerivNames[i]] += paramDerivExpressions[i].evaluate(variables);
for (int i = 0; i < numCVs; i++) {
double dEdV = variableDerivExpressions[i].evaluate(variables);
for (auto& deriv : cvDerivs[i])
energyParamDerivs[deriv.first] += dEdV*deriv.second;
}
return energy;
}
void OpenCLCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl& innerContext) {
int numAtoms = cl.getNumAtoms();
OpenCLContext& cl2 = *reinterpret_cast<OpenCLPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
if (!hasInitializedKernels) {
hasInitializedKernels = true;
// Initialize the listeners.
ReorderListener* listener1 = new ReorderListener(cl, *invAtomOrder);
ReorderListener* listener2 = new ReorderListener(cl2, *innerInvAtomOrder);
cl.addReorderListener(listener1);
cl2.addReorderListener(listener2);
listener1->execute();
listener2->execute();
// Initialize the kernels.
copyStateKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(3, cl.getAtomIndexArray().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(4, cl2.getPosq().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(6, cl2.getVelm().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(7, innerInvAtomOrder->getDeviceBuffer());
copyStateKernel.setArg<cl_int>(8, numAtoms);
if (cl.getUseMixedPrecision()) {
copyStateKernel.setArg<cl::Buffer>(1, cl.getPosqCorrection().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(5, cl2.getPosqCorrection().getDeviceBuffer());
}
else {
copyStateKernel.setArg<void*>(1, NULL);
copyStateKernel.setArg<void*>(5, NULL);
}
copyForcesKernel.setArg<cl::Buffer>(1, invAtomOrder->getDeviceBuffer());
copyForcesKernel.setArg<cl::Buffer>(2, cl2.getForce().getDeviceBuffer());
copyForcesKernel.setArg<cl::Buffer>(3, cl2.getAtomIndexArray().getDeviceBuffer());
copyForcesKernel.setArg<cl_int>(4, numAtoms);
addForcesKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer());
addForcesKernel.setArg<cl_int>(1, numAtoms);
for (int i = 0; i < cvForces.size(); i++)
addForcesKernel.setArg<cl::Buffer>(2*i+2, cvForces[i]->getDeviceBuffer());
}
cl.executeKernel(copyStateKernel, numAtoms);
Vec3 a, b, c;
context.getPeriodicBoxVectors(a, b, c);
innerContext.setPeriodicBoxVectors(a, b, c);
innerContext.setTime(context.getTime());
map<string, double> innerParameters = innerContext.getParameters();
for (auto& param : innerParameters)
innerContext.setParameter(param.first, context.getParameter(param.first));
}
OpenCLIntegrateVerletStepKernel::~OpenCLIntegrateVerletStepKernel() {
OpenCLIntegrateVerletStepKernel::~OpenCLIntegrateVerletStepKernel() {
}
}
...
@@ -7408,6 +7581,8 @@ OpenCLIntegrateCustomStepKernel::~OpenCLIntegrateCustomStepKernel() {
...
@@ -7408,6 +7581,8 @@ OpenCLIntegrateCustomStepKernel::~OpenCLIntegrateCustomStepKernel() {
delete perDofEnergyParamDerivs;
delete perDofEnergyParamDerivs;
if (perDofValues != NULL)
if (perDofValues != NULL)
delete perDofValues;
delete perDofValues;
for (auto function : tabulatedFunctions)
delete function;
for (auto& f : savedForces)
for (auto& f : savedForces)
delete f.second;
delete f.second;
}
}
...
@@ -7424,7 +7599,8 @@ void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const Cus
...
@@ -7424,7 +7599,8 @@ void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const Cus
SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed());
SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed());
}
}
string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const string& forceName, const string& energyName) {
string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator,
const string& forceName, const string& energyName, vector<const TabulatedFunction*>& functions, vector<pair<string, string> >& functionNames) {
const string suffixes[] = {".x", ".y", ".z"};
const string suffixes[] = {".x", ".y", ".z"};
string suffix = suffixes[component];
string suffix = suffixes[component];
map<string, Lepton::ParsedExpression> expressions;
map<string, Lepton::ParsedExpression> expressions;
...
@@ -7457,8 +7633,6 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va
...
@@ -7457,8 +7633,6 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va
variables[integrator.getPerDofVariableName(i)] = "perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i);
variables[integrator.getPerDofVariableName(i)] = "perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i);
for (int i = 0; i < (int) parameterNames.size(); i++)
for (int i = 0; i < (int) parameterNames.size(); i++)
variables[parameterNames[i]] = "globals["+cl.intToString(parameterVariableIndex[i])+"]";
variables[parameterNames[i]] = "globals["+cl.intToString(parameterVariableIndex[i])+"]";
vector<const TabulatedFunction*> functions;
vector<pair<string, string> > functionNames;
string tempType = (cl.getSupportsDoublePrecision() ? "double" : "float");
string tempType = (cl.getSupportsDoublePrecision() ? "double" : "float");
vector<pair<ExpressionTreeNode, string> > variableNodes;
vector<pair<ExpressionTreeNode, string> > variableNodes;
findExpressionsForDerivs(expr.getRootNode(), variableNodes);
findExpressionsForDerivs(expr.getRootNode(), variableNodes);
...
@@ -7489,16 +7663,41 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -7489,16 +7663,41 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
stepTarget.resize(numSteps);
stepTarget.resize(numSteps);
merged.resize(numSteps, false);
merged.resize(numSteps, false);
modifiesParameters = false;
modifiesParameters = false;
sumWorkGroupSize = cl.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
if (sumWorkGroupSize > 512)
sumWorkGroupSize = 512;
map<string, string> defines;
map<string, string> defines;
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
defines["WORK_GROUP_SIZE"] = cl.intToString(OpenCLContext::ThreadBlockSize);
defines["WORK_GROUP_SIZE"] = cl.intToString(sumWorkGroupSize);
// Record the tabulated functions.
map<string, Lepton::CustomFunction*> functions;
vector<pair<string, string> > functionNames;
vector<const TabulatedFunction*> functionList;
vector<string> tableTypes;
for (int i = 0; i < integrator.getNumTabulatedFunctions(); i++) {
functionList.push_back(&integrator.getTabulatedFunction(i));
string name = integrator.getTabulatedFunctionName(i);
string arrayName = "table"+cl.intToString(i);
functionNames.push_back(make_pair(name, arrayName));
functions[name] = createReferenceTabulatedFunction(integrator.getTabulatedFunction(i));
int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(integrator.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(OpenCLArray::create<float>(cl, f.size(), "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
if (width == 1)
tableTypes.push_back("float");
else
tableTypes.push_back("float"+cl.intToString(width));
}
// Record information about all the computation steps.
// Record information about all the computation steps.
vector<string> variable(numSteps);
vector<string> variable(numSteps);
vector<int> forceGroup;
vector<int> forceGroup;
vector<vector<Lepton::ParsedExpression> > expression;
vector<vector<Lepton::ParsedExpression> > expression;
CustomIntegratorUtilities::analyzeComputations(context, integrator, expression, comparisons, blockEnd, invalidatesForces, needsForces, needsEnergy, computeBothForceAndEnergy, forceGroup);
CustomIntegratorUtilities::analyzeComputations(context, integrator, expression, comparisons, blockEnd, invalidatesForces, needsForces, needsEnergy, computeBothForceAndEnergy, forceGroup
, functions
);
for (int step = 0; step < numSteps; step++) {
for (int step = 0; step < numSteps; step++) {
string expr;
string expr;
integrator.getComputationStep(step, stepType[step], variable[step], expr);
integrator.getComputationStep(step, stepType[step], variable[step], expr);
...
@@ -7669,7 +7868,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -7669,7 +7868,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
if (numUniform > 0)
if (numUniform > 0)
compute << "float4 uniform = uniformValues[uniformIndex+index];\n";
compute << "float4 uniform = uniformValues[uniformIndex+index];\n";
for (int i = 0; i < 3; i++)
for (int i = 0; i < 3; i++)
compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j][0], i, integrator, forceName[j], energyName[j]);
compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j][0], i, integrator, forceName[j], energyName[j]
, functionList, functionNames
);
if (variable[j] == "x") {
if (variable[j] == "x") {
if (storePosAsDelta[j]) {
if (storePosAsDelta[j]) {
if (cl.getSupportsDoublePrecision())
if (cl.getSupportsDoublePrecision())
...
@@ -7704,6 +7903,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -7704,6 +7903,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
string valueName = "perDofValues"+cl.intToString(i+1);
string valueName = "perDofValues"+cl.intToString(i+1);
args << ", __global " << buffer.getType() << "* restrict " << valueName;
args << ", __global " << buffer.getType() << "* restrict " << valueName;
}
}
for (int i = 0; i < (int) tableTypes.size(); i++)
args << ", __global const " << tableTypes[i]<< "* restrict table" << i;
replacements["PARAMETER_ARGUMENTS"] = args.str();
replacements["PARAMETER_ARGUMENTS"] = args.str();
if (loadPosAsDelta[step])
if (loadPosAsDelta[step])
defines["LOAD_POS_AS_DELTA"] = "1";
defines["LOAD_POS_AS_DELTA"] = "1";
...
@@ -7727,6 +7928,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -7727,6 +7928,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs->getDeviceBuffer());
for (auto& buffer : perDofValues->getBuffers())
for (auto& buffer : perDofValues->getBuffers())
kernel.setArg<cl::Memory>(index++, buffer.getMemory());
kernel.setArg<cl::Memory>(index++, buffer.getMemory());
for (auto array : tabulatedFunctions)
kernel.setArg<cl::Buffer>(index++, array->getDeviceBuffer());
if (stepType[step] == CustomIntegrator::ComputeSum) {
if (stepType[step] == CustomIntegrator::ComputeSum) {
// Create a second kernel for this step that sums the values.
// Create a second kernel for this step that sums the values.
...
@@ -7789,7 +7992,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -7789,7 +7992,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
}
}
Lepton::ParsedExpression keExpression = Lepton::Parser::parse(integrator.getKineticEnergyExpression()).optimize();
Lepton::ParsedExpression keExpression = Lepton::Parser::parse(integrator.getKineticEnergyExpression()).optimize();
for (int i = 0; i < 3; i++)
for (int i = 0; i < 3; i++)
computeKE << createPerDofComputation("", keExpression, i, integrator, "f", "");
computeKE << createPerDofComputation("", keExpression, i, integrator, "f", ""
, functionList, functionNames
);
map<string, string> replacements;
map<string, string> replacements;
replacements["COMPUTE_STEP"] = computeKE.str();
replacements["COMPUTE_STEP"] = computeKE.str();
stringstream args;
stringstream args;
...
@@ -7798,6 +8001,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -7798,6 +8001,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
string valueName = "perDofValues"+cl.intToString(i+1);
string valueName = "perDofValues"+cl.intToString(i+1);
args << ", __global " << buffer.getType() << "* restrict " << valueName;
args << ", __global " << buffer.getType() << "* restrict " << valueName;
}
}
for (int i = 0; i < (int) tableTypes.size(); i++)
args << ", __global const " << tableTypes[i]<< "* restrict table" << i;
replacements["PARAMETER_ARGUMENTS"] = args.str();
replacements["PARAMETER_ARGUMENTS"] = args.str();
if (defines.find("LOAD_POS_AS_DELTA") != defines.end())
if (defines.find("LOAD_POS_AS_DELTA") != defines.end())
defines.erase("LOAD_POS_AS_DELTA");
defines.erase("LOAD_POS_AS_DELTA");
...
@@ -7821,6 +8026,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -7821,6 +8026,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kineticEnergyKernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs->getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs->getDeviceBuffer());
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
kineticEnergyKernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory());
kineticEnergyKernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory());
for (auto array : tabulatedFunctions)
kineticEnergyKernel.setArg<cl::Buffer>(index++, array->getDeviceBuffer());
keNeedsForce = usesVariable(keExpression, "f");
keNeedsForce = usesVariable(keExpression, "f");
// Create a second kernel to sum the values.
// Create a second kernel to sum the values.
...
@@ -7831,8 +8038,13 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
...
@@ -7831,8 +8038,13 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
sumKineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer());
sumKineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer());
sumKineticEnergyKernel.setArg<cl::Buffer>(index++, summedValue->getDeviceBuffer());
sumKineticEnergyKernel.setArg<cl::Buffer>(index++, summedValue->getDeviceBuffer());
sumKineticEnergyKernel.setArg<cl_int>(index++, 3*numAtoms);
sumKineticEnergyKernel.setArg<cl_int>(index++, 3*numAtoms);
// Delete the custom functions.
for (auto& function : functions)
delete function.second;
}
}
// Make sure all values (variables, parameters, etc.) are up to date.
// Make sure all values (variables, parameters, etc.) are up to date.
if (!deviceValuesAreCurrent) {
if (!deviceValuesAreCurrent) {
...
@@ -7900,19 +8112,26 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
...
@@ -7900,19 +8112,26 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
int numAtoms = cl.getNumAtoms();
int numAtoms = cl.getNumAtoms();
int numSteps = integrator.getNumComputations();
int numSteps = integrator.getNumComputations();
if (!forcesAreValid)
savedEnergy.clear();
// Loop over computation steps in the integrator and execute them.
// Loop over computation steps in the integrator and execute them.
for (int step = 0; step < numSteps; ) {
for (int step = 0; step < numSteps; ) {
int nextStep = step+1;
int nextStep = step+1;
int forceGroups = forceGroupFlags[step];
int lastForceGroups = context.getLastForceGroups();
int lastForceGroups = context.getLastForceGroups();
if ((needsForces[step] || needsEnergy[step]) && (!forcesAreValid || lastForceGroups != forceGroupFlags[step])) {
bool haveForces = (!needsForces[step] || (forcesAreValid && lastForceGroups == forceGroups));
if (forcesAreValid && savedForces.find(lastForceGroups) != savedForces.end()) {
bool haveEnergy = (!needsEnergy[step] || savedEnergy.find(forceGroups) != savedEnergy.end());
// The forces are still valid. We just need a different force group right now. Save the old
if (!haveForces || !haveEnergy) {
// forces in case we need them again.
if (forcesAreValid) {
if (savedForces.find(lastForceGroups) != savedForces.end() && validSavedForces.find(lastForceGroups) == validSavedForces.end()) {
cl.getForce().copyTo(*savedForces[lastForceGroups]);
// The forces are still valid. We just need a different force group right now. Save the old
validSavedForces.insert(lastForceGroups);
// forces in case we need them again.
cl.getForce().copyTo(*savedForces[lastForceGroups]);
validSavedForces.insert(lastForceGroups);
}
}
}
else
else
validSavedForces.clear();
validSavedForces.clear();
...
@@ -7922,14 +8141,16 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
...
@@ -7922,14 +8141,16 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
bool computeForce = (needsForces[step] || computeBothForceAndEnergy[step]);
bool computeForce = (needsForces[step] || computeBothForceAndEnergy[step]);
bool computeEnergy = (needsEnergy[step] || computeBothForceAndEnergy[step]);
bool computeEnergy = (needsEnergy[step] || computeBothForceAndEnergy[step]);
if (!computeEnergy && validSavedForces.find(forceGroup
Flags[step]
) != validSavedForces.end()) {
if (!computeEnergy && validSavedForces.find(forceGroup
s
) != validSavedForces.end()) {
// We can just restore the forces we saved earlier.
// We can just restore the forces we saved earlier.
savedForces[forceGroupFlags[step]]->copyTo(cl.getForce());
savedForces[forceGroups]->copyTo(cl.getForce());
context.getLastForceGroups() = forceGroups;
}
}
else {
else {
recordChangedParameters(context);
recordChangedParameters(context);
energy = context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroupFlags[step]);
energy = context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroups);
savedEnergy[forceGroups] = energy;
if (needsEnergyParamDerivs) {
if (needsEnergyParamDerivs) {
context.getEnergyParameterDerivatives(energyParamDerivs);
context.getEnergyParameterDerivatives(energyParamDerivs);
if (perDofEnergyParamDerivNames.size() > 0) {
if (perDofEnergyParamDerivNames.size() > 0) {
...
@@ -7948,6 +8169,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
...
@@ -7948,6 +8169,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
forcesAreValid = true;
forcesAreValid = true;
}
}
}
}
if (needsEnergy[step])
energy = savedEnergy[forceGroups];
if (needsGlobals[step] && !deviceGlobalsAreCurrent) {
if (needsGlobals[step] && !deviceGlobalsAreCurrent) {
// Upload the global values to the device.
// Upload the global values to the device.
...
@@ -7959,6 +8182,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
...
@@ -7959,6 +8182,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
globalValues->upload(globalValuesFloat);
globalValues->upload(globalValuesFloat);
}
}
}
}
bool stepInvalidatesForces = invalidatesForces[step];
if (stepType[step] == CustomIntegrator::ComputePerDof && !merged[step]) {
if (stepType[step] == CustomIntegrator::ComputePerDof && !merged[step]) {
kernels[step][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[step]));
kernels[step][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[step]));
kernels[step][0].setArg<cl::Buffer>(8, integration.getRandom().getDeviceBuffer());
kernels[step][0].setArg<cl::Buffer>(8, integration.getRandom().getDeviceBuffer());
...
@@ -7989,7 +8213,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
...
@@ -7989,7 +8213,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
cl.executeKernel(randomKernel, numAtoms);
cl.executeKernel(randomKernel, numAtoms);
cl.clearBuffer(*sumBuffer);
cl.clearBuffer(*sumBuffer);
cl.executeKernel(kernels[step][0], numAtoms, 128);
cl.executeKernel(kernels[step][0], numAtoms, 128);
cl.executeKernel(kernels[step][1],
OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlock
Size);
cl.executeKernel(kernels[step][1],
sumWorkGroupSize, sumWorkGroup
Size);
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double value;
double value;
summedValue->download(&value);
summedValue->download(&value);
...
@@ -8003,7 +8227,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
...
@@ -8003,7 +8227,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
}
}
else if (stepType[step] == CustomIntegrator::UpdateContextState) {
else if (stepType[step] == CustomIntegrator::UpdateContextState) {
recordChangedParameters(context);
recordChangedParameters(context);
context.updateContextState();
stepInvalidatesForces =
context.updateContextState();
}
}
else if (stepType[step] == CustomIntegrator::ConstrainPositions) {
else if (stepType[step] == CustomIntegrator::ConstrainPositions) {
if (hasAnyConstraints) {
if (hasAnyConstraints) {
...
@@ -8027,8 +8251,10 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
...
@@ -8027,8 +8251,10 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
if (blockEnd[step] != -1)
if (blockEnd[step] != -1)
nextStep = blockEnd[step]; // Return to the start of a while block.
nextStep = blockEnd[step]; // Return to the start of a while block.
}
}
if (
i
nvalidatesForces
[step])
if (
stepI
nvalidatesForces
) {
forcesAreValid = false;
forcesAreValid = false;
savedEnergy.clear();
}
step = nextStep;
step = nextStep;
}
}
recordChangedParameters(context);
recordChangedParameters(context);
...
@@ -8089,7 +8315,7 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex
...
@@ -8089,7 +8315,7 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex
kineticEnergyKernel.setArg<cl::Buffer>(8, cl.getIntegrationUtilities().getRandom().getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(8, cl.getIntegrationUtilities().getRandom().getDeviceBuffer());
kineticEnergyKernel.setArg<cl_uint>(9, 0);
kineticEnergyKernel.setArg<cl_uint>(9, 0);
cl.executeKernel(kineticEnergyKernel, cl.getNumAtoms());
cl.executeKernel(kineticEnergyKernel, cl.getNumAtoms());
cl.executeKernel(sumKineticEnergyKernel,
OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlock
Size);
cl.executeKernel(sumKineticEnergyKernel,
sumWorkGroupSize, sumWorkGroup
Size);
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double ke;
double ke;
summedValue->download(&ke);
summedValue->download(&ke);
...
@@ -8246,6 +8472,8 @@ void OpenCLApplyAndersenThermostatKernel::execute(ContextImpl& context) {
...
@@ -8246,6 +8472,8 @@ void OpenCLApplyAndersenThermostatKernel::execute(ContextImpl& context) {
OpenCLApplyMonteCarloBarostatKernel::~OpenCLApplyMonteCarloBarostatKernel() {
OpenCLApplyMonteCarloBarostatKernel::~OpenCLApplyMonteCarloBarostatKernel() {
if (savedPositions != NULL)
if (savedPositions != NULL)
delete savedPositions;
delete savedPositions;
if (savedForces != NULL)
delete savedForces;
if (moleculeAtoms != NULL)
if (moleculeAtoms != NULL)
delete moleculeAtoms;
delete moleculeAtoms;
if (moleculeStartIndex != NULL)
if (moleculeStartIndex != NULL)
...
@@ -8254,6 +8482,7 @@ OpenCLApplyMonteCarloBarostatKernel::~OpenCLApplyMonteCarloBarostatKernel() {
...
@@ -8254,6 +8482,7 @@ OpenCLApplyMonteCarloBarostatKernel::~OpenCLApplyMonteCarloBarostatKernel() {
void OpenCLApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) {
void OpenCLApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) {
savedPositions = new OpenCLArray(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedPositions");
savedPositions = new OpenCLArray(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedPositions");
savedForces = new OpenCLArray(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedForces");
cl::Program program = cl.createProgram(OpenCLKernelSources::monteCarloBarostat);
cl::Program program = cl.createProgram(OpenCLKernelSources::monteCarloBarostat);
kernel = cl::Kernel(program, "scalePositions");
kernel = cl::Kernel(program, "scalePositions");
}
}
...
@@ -8289,6 +8518,7 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
...
@@ -8289,6 +8518,7 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
}
}
int bytesToCopy = cl.getPosq().getSize()*(cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
int bytesToCopy = cl.getPosq().getSize()*(cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
cl.getQueue().enqueueCopyBuffer(cl.getPosq().getDeviceBuffer(), savedPositions->getDeviceBuffer(), 0, 0, bytesToCopy);
cl.getQueue().enqueueCopyBuffer(cl.getPosq().getDeviceBuffer(), savedPositions->getDeviceBuffer(), 0, 0, bytesToCopy);
cl.getQueue().enqueueCopyBuffer(cl.getForce().getDeviceBuffer(), savedForces->getDeviceBuffer(), 0, 0, bytesToCopy);
kernel.setArg<cl_float>(0, (cl_float) scaleX);
kernel.setArg<cl_float>(0, (cl_float) scaleX);
kernel.setArg<cl_float>(1, (cl_float) scaleY);
kernel.setArg<cl_float>(1, (cl_float) scaleY);
kernel.setArg<cl_float>(2, (cl_float) scaleZ);
kernel.setArg<cl_float>(2, (cl_float) scaleZ);
...
@@ -8302,6 +8532,7 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
...
@@ -8302,6 +8532,7 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
void OpenCLApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) {
void OpenCLApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) {
int bytesToCopy = cl.getPosq().getSize()*(cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
int bytesToCopy = cl.getPosq().getSize()*(cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
cl.getQueue().enqueueCopyBuffer(savedPositions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(), 0, 0, bytesToCopy);
cl.getQueue().enqueueCopyBuffer(savedPositions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(), 0, 0, bytesToCopy);
cl.getQueue().enqueueCopyBuffer(savedForces->getDeviceBuffer(), cl.getForce().getDeviceBuffer(), 0, 0, bytesToCopy);
}
}
OpenCLRemoveCMMotionKernel::~OpenCLRemoveCMMotionKernel() {
OpenCLRemoveCMMotionKernel::~OpenCLRemoveCMMotionKernel() {
...
...
platforms/opencl/src/OpenCLPlatform.cpp
View file @
18295108
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2008-201
6
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
7
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -82,6 +82,7 @@ OpenCLPlatform::OpenCLPlatform() {
...
@@ -82,6 +82,7 @@ OpenCLPlatform::OpenCLPlatform() {
registerKernelFactory
(
CalcCustomHbondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomHbondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCentroidBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCentroidBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCompoundBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCompoundBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCVForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomManyParticleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomManyParticleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGayBerneForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGayBerneForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
...
@@ -179,7 +180,20 @@ void OpenCLPlatform::contextCreated(ContextImpl& context, const map<string, stri
...
@@ -179,7 +180,20 @@ void OpenCLPlatform::contextCreated(ContextImpl& context, const map<string, stri
char
*
threadsEnv
=
getenv
(
"OPENMM_CPU_THREADS"
);
char
*
threadsEnv
=
getenv
(
"OPENMM_CPU_THREADS"
);
if
(
threadsEnv
!=
NULL
)
if
(
threadsEnv
!=
NULL
)
stringstream
(
threadsEnv
)
>>
threads
;
stringstream
(
threadsEnv
)
>>
threads
;
context
.
setPlatformData
(
new
PlatformData
(
context
.
getSystem
(),
platformPropValue
,
devicePropValue
,
precisionPropValue
,
cpuPmePropValue
,
pmeStreamPropValue
,
threads
));
context
.
setPlatformData
(
new
PlatformData
(
context
.
getSystem
(),
platformPropValue
,
devicePropValue
,
precisionPropValue
,
cpuPmePropValue
,
pmeStreamPropValue
,
threads
,
NULL
));
}
void
OpenCLPlatform
::
linkedContextCreated
(
ContextImpl
&
context
,
ContextImpl
&
originalContext
)
const
{
Platform
&
platform
=
originalContext
.
getPlatform
();
string
platformPropValue
=
platform
.
getPropertyValue
(
originalContext
.
getOwner
(),
OpenCLPlatformIndex
());
string
devicePropValue
=
platform
.
getPropertyValue
(
originalContext
.
getOwner
(),
OpenCLDeviceIndex
());
string
precisionPropValue
=
platform
.
getPropertyValue
(
originalContext
.
getOwner
(),
OpenCLPrecision
());
string
cpuPmePropValue
=
platform
.
getPropertyValue
(
originalContext
.
getOwner
(),
OpenCLUseCpuPme
());
string
pmeStreamPropValue
=
platform
.
getPropertyValue
(
originalContext
.
getOwner
(),
OpenCLDisablePmeStream
());
int
threads
=
reinterpret_cast
<
PlatformData
*>
(
originalContext
.
getPlatformData
())
->
threads
.
getNumThreads
();
context
.
setPlatformData
(
new
PlatformData
(
context
.
getSystem
(),
platformPropValue
,
devicePropValue
,
precisionPropValue
,
cpuPmePropValue
,
pmeStreamPropValue
,
threads
,
&
originalContext
));
}
}
void
OpenCLPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
void
OpenCLPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
...
@@ -188,7 +202,7 @@ void OpenCLPlatform::contextDestroyed(ContextImpl& context) const {
...
@@ -188,7 +202,7 @@ void OpenCLPlatform::contextDestroyed(ContextImpl& context) const {
}
}
OpenCLPlatform
::
PlatformData
::
PlatformData
(
const
System
&
system
,
const
string
&
platformPropValue
,
const
string
&
deviceIndexProperty
,
OpenCLPlatform
::
PlatformData
::
PlatformData
(
const
System
&
system
,
const
string
&
platformPropValue
,
const
string
&
deviceIndexProperty
,
const
string
&
precisionProperty
,
const
string
&
cpuPmeProperty
,
const
string
&
pmeStreamProperty
,
int
numThreads
)
:
const
string
&
precisionProperty
,
const
string
&
cpuPmeProperty
,
const
string
&
pmeStreamProperty
,
int
numThreads
,
ContextImpl
*
originalContext
)
:
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
),
hasInitializedContexts
(
false
),
threads
(
numThreads
)
{
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
),
hasInitializedContexts
(
false
),
threads
(
numThreads
)
{
int
platformIndex
=
-
1
;
int
platformIndex
=
-
1
;
if
(
platformPropValue
.
length
()
>
0
)
if
(
platformPropValue
.
length
()
>
0
)
...
@@ -200,16 +214,19 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
...
@@ -200,16 +214,19 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
searchPos
=
nextPos
+
1
;
searchPos
=
nextPos
+
1
;
}
}
devices
.
push_back
(
deviceIndexProperty
.
substr
(
searchPos
));
devices
.
push_back
(
deviceIndexProperty
.
substr
(
searchPos
));
PlatformData
*
originalData
=
NULL
;
if
(
originalContext
!=
NULL
)
originalData
=
reinterpret_cast
<
PlatformData
*>
(
originalContext
->
getPlatformData
());
try
{
try
{
for
(
int
i
=
0
;
i
<
(
int
)
devices
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
devices
.
size
();
i
++
)
{
if
(
devices
[
i
].
length
()
>
0
)
{
if
(
devices
[
i
].
length
()
>
0
)
{
int
deviceIndex
;
int
deviceIndex
;
stringstream
(
devices
[
i
])
>>
deviceIndex
;
stringstream
(
devices
[
i
])
>>
deviceIndex
;
contexts
.
push_back
(
new
OpenCLContext
(
system
,
platformIndex
,
deviceIndex
,
precisionProperty
,
*
this
));
contexts
.
push_back
(
new
OpenCLContext
(
system
,
platformIndex
,
deviceIndex
,
precisionProperty
,
*
this
,
(
originalData
==
NULL
?
NULL
:
originalData
->
contexts
[
i
])
));
}
}
}
}
if
(
contexts
.
size
()
==
0
)
if
(
contexts
.
size
()
==
0
)
contexts
.
push_back
(
new
OpenCLContext
(
system
,
platformIndex
,
-
1
,
precisionProperty
,
*
this
));
contexts
.
push_back
(
new
OpenCLContext
(
system
,
platformIndex
,
-
1
,
precisionProperty
,
*
this
,
(
originalData
==
NULL
?
NULL
:
originalData
->
contexts
[
0
])
));
}
}
catch
(...)
{
catch
(...)
{
// If an exception was thrown, do our best to clean up memory.
// If an exception was thrown, do our best to clean up memory.
...
...
platforms/opencl/src/kernels/customCVForce.cl
0 → 100644
View file @
18295108
/**
*
Copy
the
positions
and
velocities
to
the
inner
context.
*/
__kernel
void
copyState
(
__global
real4*
posq,
__global
real4*
posqCorrection,
__global
mixed4*
velm,
__global
int*
restrict
atomOrder,
__global
real4*
innerPosq,
__global
real4*
innerPosqCorrection,
__global
mixed4*
innerVelm,
__global
int*
restrict
innerInvAtomOrder,
int
numAtoms
)
{
for
(
int
i
=
get_global_id
(
0
)
; i < numAtoms; i += get_global_size(0)) {
int
index
=
innerInvAtomOrder[atomOrder[i]]
;
innerPosq[index]
=
posq[i]
;
innerVelm[index]
=
velm[i]
;
#
ifdef
USE_MIXED_PRECISION
innerPosqCorrection[index]
=
posqCorrection[i]
;
#
endif
}
}
/**
*
Copy
the
forces
back
to
the
main
context.
*/
__kernel
void
copyForces
(
__global
real4*
forces,
__global
int*
restrict
invAtomOrder,
__global
real4*
innerForces,
__global
int*
restrict
innerAtomOrder,
int
numAtoms
)
{
for
(
int
i
=
get_global_id
(
0
)
; i < numAtoms; i += get_global_size(0)) {
int
index
=
invAtomOrder[innerAtomOrder[i]]
;
forces[index]
=
innerForces[i]
;
}
}
/**
*
Add
all
the
forces
from
the
CVs.
*/
__kernel
void
addForces
(
__global
real4*
forces,
int
numAtoms
PARAMETER_ARGUMENTS
)
{
for
(
int
i
=
get_global_id
(
0
)
; i < numAtoms; i += get_global_size(0)) {
real4
f
=
forces[i]
;
ADD_FORCES
forces[i]
=
f
;
}
}
\ No newline at end of file
platforms/opencl/src/kernels/customHbondForce.cl
View file @
18295108
/**
/**
*
Compute
the
difference
between
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*
Compute
the
difference
between
two
vectors,
optionally
taking
periodic
boundary
conditions
into
account
*/
real4
delta
(
real4
vec1,
real4
vec2
)
{
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
}
/**
*
Compute
the
difference
between
two
vectors,
taking
periodic
boundary
conditions
into
account
*
and
setting
the
fourth
component
to
the
squared
magnitude.
*
and
setting
the
fourth
component
to
the
squared
magnitude.
*/
*/
real4
delta
Periodic
(
real4
vec1,
real4
vec2,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
)
{
real4
delta
(
real4
vec1,
real4
vec2,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
)
{
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
result
)
APPLY_PERIODIC_TO_DELTA
(
result
)
...
@@ -81,6 +72,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
...
@@ -81,6 +72,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
for (int acceptorStart = 0; acceptorStart < NUM_ACCEPTORS; acceptorStart += get_local_size(0)) {
for (int acceptorStart = 0; acceptorStart < NUM_ACCEPTORS; acceptorStart += get_local_size(0)) {
// Load the next block of acceptors into local memory.
// Load the next block of acceptors into local memory.
barrier(CLK_LOCAL_MEM_FENCE);
int blockSize = min((int) get_local_size(0), NUM_ACCEPTORS-acceptorStart);
int blockSize = min((int) get_local_size(0), NUM_ACCEPTORS-acceptorStart);
if (get_local_id(0) < blockSize) {
if (get_local_id(0) < blockSize) {
int4 atoms2 = acceptorAtoms[acceptorStart+get_local_id(0)];
int4 atoms2 = acceptorAtoms[acceptorStart+get_local_id(0)];
...
@@ -91,8 +83,8 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
...
@@ -91,8 +83,8 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if (donorIndex < NUM_DONORS) {
if (donorIndex < NUM_DONORS) {
for (int index = 0; index < blockSize; index++) {
for (int index = 0; index < blockSize; index++) {
#ifdef USE_EXCLUSIONS
int acceptorIndex = acceptorStart+index;
int acceptorIndex = acceptorStart+index;
#ifdef USE_EXCLUSIONS
if (acceptorIndex == exclusionIndices.x || acceptorIndex == exclusionIndices.y || acceptorIndex == exclusionIndices.z || acceptorIndex == exclusionIndices.w)
if (acceptorIndex == exclusionIndices.x || acceptorIndex == exclusionIndices.y || acceptorIndex == exclusionIndices.z || acceptorIndex == exclusionIndices.w)
continue;
continue;
#endif
#endif
...
@@ -101,7 +93,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
...
@@ -101,7 +93,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
real4 a1 = posBuffer[3*index];
real4 a1 = posBuffer[3*index];
real4 a2 = posBuffer[3*index+1];
real4 a2 = posBuffer[3*index+1];
real4 a3 = posBuffer[3*index+2];
real4 a3 = posBuffer[3*index+2];
real4 deltaD1A1 = delta
Periodic
(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
real4 deltaD1A1 = delta(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if (deltaD1A1.w < CUTOFF_SQUARED) {
if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif
#endif
...
@@ -169,6 +161,7 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo
...
@@ -169,6 +161,7 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo
for (int donorStart = 0; donorStart < NUM_DONORS; donorStart += get_local_size(0)) {
for (int donorStart = 0; donorStart < NUM_DONORS; donorStart += get_local_size(0)) {
// Load the next block of donors into local memory.
// Load the next block of donors into local memory.
barrier(CLK_LOCAL_MEM_FENCE);
int blockSize = min((int) get_local_size(0), NUM_DONORS-donorStart);
int blockSize = min((int) get_local_size(0), NUM_DONORS-donorStart);
if (get_local_id(0) < blockSize) {
if (get_local_id(0) < blockSize) {
int4 atoms2 = donorAtoms[donorStart+get_local_id(0)];
int4 atoms2 = donorAtoms[donorStart+get_local_id(0)];
...
@@ -179,8 +172,8 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo
...
@@ -179,8 +172,8 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if (acceptorIndex < NUM_ACCEPTORS) {
if (acceptorIndex < NUM_ACCEPTORS) {
for (int index = 0; index < blockSize; index++) {
for (int index = 0; index < blockSize; index++) {
#ifdef USE_EXCLUSIONS
int donorIndex = donorStart+index;
int donorIndex = donorStart+index;
#ifdef USE_EXCLUSIONS
if (donorIndex == exclusionIndices.x || donorIndex == exclusionIndices.y || donorIndex == exclusionIndices.z |
|
donorIndex
==
exclusionIndices.w
)
if (donorIndex == exclusionIndices.x || donorIndex == exclusionIndices.y || donorIndex == exclusionIndices.z |
|
donorIndex
==
exclusionIndices.w
)
continue
;
continue
;
#
endif
#
endif
...
@@ -189,7 +182,7 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo
...
@@ -189,7 +182,7 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo
real4
d1
=
posBuffer[3*index]
;
real4
d1
=
posBuffer[3*index]
;
real4
d2
=
posBuffer[3*index+1]
;
real4
d2
=
posBuffer[3*index+1]
;
real4
d3
=
posBuffer[3*index+2]
;
real4
d3
=
posBuffer[3*index+2]
;
real4
deltaD1A1
=
delta
Periodic
(
d1,
a1,
periodicBoxSize,
invPeriodicBoxSize,
periodicBoxVecX,
periodicBoxVecY,
periodicBoxVecZ
)
;
real4
deltaD1A1
=
delta
(
d1,
a1,
periodicBoxSize,
invPeriodicBoxSize,
periodicBoxVecX,
periodicBoxVecY,
periodicBoxVecZ
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
deltaD1A1.w
<
CUTOFF_SQUARED
)
{
if
(
deltaD1A1.w
<
CUTOFF_SQUARED
)
{
#
endif
#
endif
...
...
platforms/opencl/src/kernels/customManyParticle.cl
View file @
18295108
...
@@ -56,6 +56,11 @@ inline real4 computeCross(real4 vec1, real4 vec2) {
...
@@ -56,6 +56,11 @@ inline real4 computeCross(real4 vec1, real4 vec2) {
*
Determine
whether
a
particular
interaction
is
in
the
list
of
exclusions.
*
Determine
whether
a
particular
interaction
is
in
the
list
of
exclusions.
*/
*/
inline
bool
isInteractionExcluded
(
int
atom1,
int
atom2,
__global
const
int*
restrict
exclusions,
__global
const
int*
restrict
exclusionStartIndex
)
{
inline
bool
isInteractionExcluded
(
int
atom1,
int
atom2,
__global
const
int*
restrict
exclusions,
__global
const
int*
restrict
exclusionStartIndex
)
{
if
(
atom1
>
atom2
)
{
int
temp
=
atom1
;
atom1
=
atom2
;
atom2
=
temp
;
}
int
first
=
exclusionStartIndex[atom1]
;
int
first
=
exclusionStartIndex[atom1]
;
int
last
=
exclusionStartIndex[atom1+1]
;
int
last
=
exclusionStartIndex[atom1+1]
;
for
(
int
i
=
last-1
; i >= first; i--) {
for
(
int
i
=
last-1
; i >= first; i--) {
...
...
platforms/opencl/src/kernels/utilities.cl
View file @
18295108
...
@@ -97,6 +97,24 @@ __kernel void reduceForces(__global const long* restrict longBuffer, __global re
...
@@ -97,6 +97,24 @@ __kernel void reduceForces(__global const long* restrict longBuffer, __global re
}
}
#
endif
#
endif
/**
*
Sum
the
energy
buffer.
*/
__kernel
void
reduceEnergy
(
__global
const
mixed*
restrict
energyBuffer,
__global
mixed*
restrict
result,
int
bufferSize,
int
workGroupSize,
__local
mixed*
tempBuffer
)
{
const
unsigned
int
thread
=
get_local_id
(
0
)
;
mixed
sum
=
0
;
for
(
unsigned
int
index
=
thread
; index < bufferSize; index += get_local_size(0))
sum
+=
energyBuffer[index]
;
tempBuffer[thread]
=
sum
;
for
(
int
i
=
1
; i < workGroupSize; i *= 2) {
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
thread%
(
i*2
)
==
0
&&
thread+i
<
workGroupSize
)
tempBuffer[thread]
+=
tempBuffer[thread+i]
;
}
if
(
thread
==
0
)
*result
=
tempBuffer[0]
;
}
/**
/**
*
This
is
called
to
determine
the
accuracy
of
various
native
functions.
*
This
is
called
to
determine
the
accuracy
of
various
native
functions.
*/
*/
...
...
platforms/opencl/src/kernels/virtualSites.cl
View file @
18295108
...
@@ -33,7 +33,9 @@ __kernel void computeVirtualSites(__global real4* restrict posq,
...
@@ -33,7 +33,9 @@ __kernel void computeVirtualSites(__global real4* restrict posq,
__global
const
int4*
restrict
avg2Atoms,
__global
const
real2*
restrict
avg2Weights,
__global
const
int4*
restrict
avg2Atoms,
__global
const
real2*
restrict
avg2Weights,
__global
const
int4*
restrict
avg3Atoms,
__global
const
real4*
restrict
avg3Weights,
__global
const
int4*
restrict
avg3Atoms,
__global
const
real4*
restrict
avg3Weights,
__global
const
int4*
restrict
outOfPlaneAtoms,
__global
const
real4*
restrict
outOfPlaneWeights,
__global
const
int4*
restrict
outOfPlaneAtoms,
__global
const
real4*
restrict
outOfPlaneWeights,
__global
const
int4*
restrict
localCoordsAtoms,
__global
const
real*
restrict
localCoordsParams
)
{
__global
const
int*
restrict
localCoordsIndex,
__global
const
int*
restrict
localCoordsAtoms,
__global
const
real*
restrict
localCoordsWeights,
__global
const
real4*
restrict
localCoordsPos,
__global
const
int*
restrict
localCoordsStartIndex
)
{
#
ifndef
USE_MIXED_PRECISION
#
ifndef
USE_MIXED_PRECISION
__global
real4*
posqCorrection
=
0
;
__global
real4*
posqCorrection
=
0
;
#
endif
#
endif
...
@@ -81,30 +83,30 @@ __kernel void computeVirtualSites(__global real4* restrict posq,
...
@@ -81,30 +83,30 @@ __kernel void computeVirtualSites(__global real4* restrict posq,
//
Local
coordinates
sites.
//
Local
coordinates
sites.
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_LOCAL_COORDS; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_LOCAL_COORDS; index += get_global_size(0)) {
int4
atoms
=
localCoordsAtoms[index]
;
int
siteAtomIndex
=
localCoordsIndex[index]
;
__global
const
real*
params
=
&localCoordsParams[12*index]
;
int
start
=
localCoordsStartIndex[index]
;
mixed4
pos
=
loadPos
(
posq,
posqCorrection,
atoms.x
)
;
int
end
=
localCoordsStartIndex[index+1]
;
mixed4
pos1_4
=
loadPos
(
posq,
posqCorrection,
atoms.y
)
;
mixed3
origin
=
0
,
xdir
=
0
,
ydir
=
0
;
mixed4
pos2_4
=
loadPos
(
posq,
posqCorrection,
atoms.z
)
;
for
(
int
j
=
start
; j < end; j++) {
mixed4
pos3_4
=
loadPos
(
posq,
posqCorrection,
atoms.w
)
;
mixed3
pos
=
loadPos
(
posq,
posqCorrection,
localCoordsAtoms[j]
)
.
xyz
;
mixed4
pos1
=
(
mixed4
)
(
pos1_4.x,
pos1_4.y,
pos1_4.z,
0
)
;
origin
+=
pos*localCoordsWeights[3*j]
;
mixed4
pos2
=
(
mixed4
)
(
pos2_4.x,
pos2_4.y,
pos2_4.z,
0
)
;
xdir
+=
pos*localCoordsWeights[3*j+1]
;
mixed4
pos3
=
(
mixed4
)
(
pos3_4.x,
pos3_4.y,
pos3_4.z,
0
)
;
ydir
+=
pos*localCoordsWeights[3*j+2]
;
mixed4
originWeights
=
(
mixed4
)
(
params[0],
params[1],
params[2],
0
)
;
}
mixed4
xWeights
=
(
mixed4
)
(
params[3],
params[4],
params[5],
0
)
;
mixed3
zdir
=
cross
(
xdir,
ydir
)
;
mixed4
yWeights
=
(
mixed4
)
(
params[6],
params[7],
params[8],
0
)
;
mixed
normXdir
=
sqrt
(
xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z
)
;
mixed4
localPosition
=
(
mixed4
)
(
params[9],
params[10],
params[11],
0
)
;
mixed
normZdir
=
sqrt
(
zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z
)
;
mixed4
origin
=
pos1*originWeights.x
+
pos2*originWeights.y
+
pos3*originWeights.z
;
mixed
invNormXdir
=
(
normXdir
>
0
?
1/normXdir
:
0
)
;
mixed4
xdir
=
pos1*xWeights.x
+
pos2*xWeights.y
+
pos3*xWeights.z
;
mixed
invNormZdir
=
(
normZdir
>
0
?
1/normZdir
:
0
)
;
mixed4
ydir
=
pos1*yWeights.x
+
pos2*yWeights.y
+
pos3*yWeights.z
;
xdir
*=
invNormXdir
;
mixed4
zdir
=
cross
(
xdir,
ydir
)
;
zdir
*=
invNormZdir
;
xdir
*=
rsqrt
(
xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z
)
;
zdir
*=
rsqrt
(
zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z
)
;
ydir
=
cross
(
zdir,
xdir
)
;
ydir
=
cross
(
zdir,
xdir
)
;
mixed3
localPosition
=
convert_mixed4
(
localCoordsPos[index]
)
.
xyz
;
mixed4
pos
=
loadPos
(
posq,
posqCorrection,
siteAtomIndex
)
;
pos.x
=
origin.x
+
xdir.x*localPosition.x
+
ydir.x*localPosition.y
+
zdir.x*localPosition.z
;
pos.x
=
origin.x
+
xdir.x*localPosition.x
+
ydir.x*localPosition.y
+
zdir.x*localPosition.z
;
pos.y
=
origin.y
+
xdir.y*localPosition.x
+
ydir.y*localPosition.y
+
zdir.y*localPosition.z
;
pos.y
=
origin.y
+
xdir.y*localPosition.x
+
ydir.y*localPosition.y
+
zdir.y*localPosition.z
;
pos.z
=
origin.z
+
xdir.z*localPosition.x
+
ydir.z*localPosition.y
+
zdir.z*localPosition.z
;
pos.z
=
origin.z
+
xdir.z*localPosition.x
+
ydir.z*localPosition.y
+
zdir.z*localPosition.z
;
storePos
(
posq,
posqCorrection,
atoms.
x,
pos
)
;
storePos
(
posq,
posqCorrection,
siteAtomInde
x,
pos
)
;
}
}
}
}
...
@@ -174,7 +176,9 @@ __kernel void distributeForces(__global const real4* restrict posq, __global rea
...
@@ -174,7 +176,9 @@ __kernel void distributeForces(__global const real4* restrict posq, __global rea
__global
const
int4*
restrict
avg2Atoms,
__global
const
real2*
restrict
avg2Weights,
__global
const
int4*
restrict
avg2Atoms,
__global
const
real2*
restrict
avg2Weights,
__global
const
int4*
restrict
avg3Atoms,
__global
const
real4*
restrict
avg3Weights,
__global
const
int4*
restrict
avg3Atoms,
__global
const
real4*
restrict
avg3Weights,
__global
const
int4*
restrict
outOfPlaneAtoms,
__global
const
real4*
restrict
outOfPlaneWeights,
__global
const
int4*
restrict
outOfPlaneAtoms,
__global
const
real4*
restrict
outOfPlaneWeights,
__global
const
int4*
restrict
localCoordsAtoms,
__global
const
real*
restrict
localCoordsParams
)
{
__global
const
int*
restrict
localCoordsIndex,
__global
const
int*
restrict
localCoordsAtoms,
__global
const
real*
restrict
localCoordsWeights,
__global
const
real4*
restrict
localCoordsPos,
__global
const
int*
restrict
localCoordsStartIndex
)
{
#
ifndef
USE_MIXED_PRECISION
#
ifndef
USE_MIXED_PRECISION
__global
real4*
posqCorrection
=
0
;
__global
real4*
posqCorrection
=
0
;
#
endif
#
endif
...
@@ -225,86 +229,54 @@ __kernel void distributeForces(__global const real4* restrict posq, __global rea
...
@@ -225,86 +229,54 @@ __kernel void distributeForces(__global const real4* restrict posq, __global rea
//
Local
coordinates
sites.
//
Local
coordinates
sites.
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_LOCAL_COORDS; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_LOCAL_COORDS; index += get_global_size(0)) {
int4
atoms
=
localCoordsAtoms[index]
;
int
siteAtomIndex
=
localCoordsIndex[index]
;
__global
const
real*
params
=
&localCoordsParams[12*index]
;
int
start
=
localCoordsStartIndex[index]
;
mixed4
pos
=
loadPos
(
posq,
posqCorrection,
atoms.x
)
;
int
end
=
localCoordsStartIndex[index+1]
;
mixed4
pos1_4
=
loadPos
(
posq,
posqCorrection,
atoms.y
)
;
mixed3
origin
=
0
,
xdir
=
0
,
ydir
=
0
;
mixed4
pos2_4
=
loadPos
(
posq,
posqCorrection,
atoms.z
)
;
for
(
int
j
=
start
; j < end; j++) {
mixed4
pos3_4
=
loadPos
(
posq,
posqCorrection,
atoms.w
)
;
mixed3
pos
=
loadPos
(
posq,
posqCorrection,
localCoordsAtoms[j]
)
.
xyz
;
mixed4
pos1
=
(
mixed4
)
(
pos1_4.x,
pos1_4.y,
pos1_4.z,
0
)
;
origin
+=
pos*localCoordsWeights[3*j]
;
mixed4
pos2
=
(
mixed4
)
(
pos2_4.x,
pos2_4.y,
pos2_4.z,
0
)
;
xdir
+=
pos*localCoordsWeights[3*j+1]
;
mixed4
pos3
=
(
mixed4
)
(
pos3_4.x,
pos3_4.y,
pos3_4.z,
0
)
;
ydir
+=
pos*localCoordsWeights[3*j+2]
;
mixed4
originWeights
=
(
mixed4
)
(
params[0],
params[1],
params[2],
0
)
;
}
mixed4
wx
=
(
mixed4
)
(
params[3],
params[4],
params[5],
0
)
;
mixed3
zdir
=
cross
(
xdir,
ydir
)
;
mixed4
wy
=
(
mixed4
)
(
params[6],
params[7],
params[8],
0
)
;
mixed
normXdir
=
sqrt
(
xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z
)
;
mixed4
localPosition
=
(
mixed4
)
(
params[9],
params[10],
params[11],
0
)
;
mixed
normZdir
=
sqrt
(
zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z
)
;
mixed4
origin
=
pos1*originWeights.x
+
pos2*originWeights.y
+
pos3*originWeights.z
;
mixed
invNormXdir
=
(
normXdir
>
0
?
1/normXdir
:
0
)
;
mixed4
xdir
=
pos1*wx.x
+
pos2*wx.y
+
pos3*wx.z
;
mixed
invNormZdir
=
(
normZdir
>
0
?
1/normZdir
:
0
)
;
mixed4
ydir
=
pos1*wy.x
+
pos2*wy.y
+
pos3*wy.z
;
mixed3
dx
=
xdir*invNormXdir
;
mixed4
zdir
=
cross
(
xdir,
ydir
)
;
mixed3
dz
=
zdir*invNormZdir
;
mixed
invNormXdir
=
rsqrt
(
xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z
)
;
mixed3
dy
=
cross
(
dz,
dx
)
;
mixed
invNormZdir
=
rsqrt
(
zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z
)
;
mixed3
localPosition
=
convert_mixed4
(
localCoordsPos[index]
)
.
xyz
;
mixed4
dx
=
xdir*invNormXdir
;
mixed4
dz
=
zdir*invNormZdir
;
mixed4
dy
=
cross
(
dz,
dx
)
;
//
The
derivatives
for
this
case
are
very
complicated.
They
were
computed
with
SymPy
then
simplified
by
hand.
//
The
derivatives
for
this
case
are
very
complicated.
They
were
computed
with
SymPy
then
simplified
by
hand.
mixed
t11
=
(
wx.x*ydir.x-wy.x*xdir.x
)
*invNormZdir
;
real4
f
=
force[siteAtomIndex]
;
mixed
t12
=
(
wx.x*ydir.y-wy.x*xdir.y
)
*invNormZdir
;
mixed3
fp1
=
localPosition*f.x
;
mixed
t13
=
(
wx.x*ydir.z-wy.x*xdir.z
)
*invNormZdir
;
mixed3
fp2
=
localPosition*f.y
;
mixed
t21
=
(
wx.y*ydir.x-wy.y*xdir.x
)
*invNormZdir
;
mixed3
fp3
=
localPosition*f.z
;
mixed
t22
=
(
wx.y*ydir.y-wy.y*xdir.y
)
*invNormZdir
;
for
(
int
j
=
start
; j < end; j++) {
mixed
t23
=
(
wx.y*ydir.z-wy.y*xdir.z
)
*invNormZdir
;
real
originWeight
=
localCoordsWeights[3*j]
;
mixed
t31
=
(
wx.z*ydir.x-wy.z*xdir.x
)
*invNormZdir
;
real
wx
=
localCoordsWeights[3*j+1]
;
mixed
t32
=
(
wx.z*ydir.y-wy.z*xdir.y
)
*invNormZdir
;
real
wy
=
localCoordsWeights[3*j+2]
;
mixed
t33
=
(
wx.z*ydir.z-wy.z*xdir.z
)
*invNormZdir
;
mixed
wxScaled
=
wx*invNormXdir
;
mixed
sx1
=
t13*dz.y-t12*dz.z
;
mixed
t1
=
(
wx*ydir.x-wy*xdir.x
)
*invNormZdir
;
mixed
sy1
=
t11*dz.z-t13*dz.x
;
mixed
t2
=
(
wx*ydir.y-wy*xdir.y
)
*invNormZdir
;
mixed
sz1
=
t12*dz.x-t11*dz.y
;
mixed
t3
=
(
wx*ydir.z-wy*xdir.z
)
*invNormZdir
;
mixed
sx2
=
t23*dz.y-t22*dz.z
;
mixed
sx
=
t3*dz.y-t2*dz.z
;
mixed
sy2
=
t21*dz.z-t23*dz.x
;
mixed
sy
=
t1*dz.z-t3*dz.x
;
mixed
sz2
=
t22*dz.x-t21*dz.y
;
mixed
sz
=
t2*dz.x-t1*dz.y
;
mixed
sx3
=
t33*dz.y-t32*dz.z
;
real4
fresult
=
0
;
mixed
sy3
=
t31*dz.z-t33*dz.x
;
fresult.x
+=
fp1.x*wxScaled*
(
1-dx.x*dx.x
)
+
fp1.z*
(
dz.x*sx
)
+
fp1.y*
((
-dx.x*dy.x
)
*wxScaled
+
dy.x*sx
-
dx.y*t2
-
dx.z*t3
)
+
f.x*originWeight
;
mixed
sz3
=
t32*dz.x-t31*dz.y
;
fresult.y
+=
fp1.x*wxScaled*
(
-dx.x*dx.y
)
+
fp1.z*
(
dz.x*sy+t3
)
+
fp1.y*
((
-dx.y*dy.x-dz.z
)
*wxScaled
+
dy.x*sy
+
dx.y*t1
)
;
mixed4
wxScaled
=
wx*invNormXdir
;
fresult.z
+=
fp1.x*wxScaled*
(
-dx.x*dx.z
)
+
fp1.z*
(
dz.x*sz-t2
)
+
fp1.y*
((
-dx.z*dy.x+dz.y
)
*wxScaled
+
dy.x*sz
+
dx.z*t1
)
;
real4
f
=
force[atoms.x]
;
fresult.x
+=
fp2.x*wxScaled*
(
-dx.y*dx.x
)
+
fp2.z*
(
dz.y*sx-t3
)
-
fp2.y*
((
dx.x*dy.y-dz.z
)
*wxScaled
-
dy.y*sx
-
dx.x*t2
)
;
real4
f1
=
0
;
fresult.y
+=
fp2.x*wxScaled*
(
1-dx.y*dx.y
)
+
fp2.z*
(
dz.y*sy
)
-
fp2.y*
((
dx.y*dy.y
)
*wxScaled
-
dy.y*sy
+
dx.x*t1
+
dx.z*t3
)
+
f.y*originWeight
;
real4
f2
=
0
;
fresult.z
+=
fp2.x*wxScaled*
(
-dx.y*dx.z
)
+
fp2.z*
(
dz.y*sz+t1
)
-
fp2.y*
((
dx.z*dy.y+dz.x
)
*wxScaled
-
dy.y*sz
-
dx.z*t2
)
;
real4
f3
=
0
;
fresult.x
+=
fp3.x*wxScaled*
(
-dx.z*dx.x
)
+
fp3.z*
(
dz.z*sx+t2
)
+
fp3.y*
((
-dx.x*dy.z-dz.y
)
*wxScaled
+
dy.z*sx
+
dx.x*t3
)
;
mixed4
fp1
=
localPosition*f.x
;
fresult.y
+=
fp3.x*wxScaled*
(
-dx.z*dx.y
)
+
fp3.z*
(
dz.z*sy-t1
)
+
fp3.y*
((
-dx.y*dy.z+dz.x
)
*wxScaled
+
dy.z*sy
+
dx.y*t3
)
;
mixed4
fp2
=
localPosition*f.y
;
fresult.z
+=
fp3.x*wxScaled*
(
1-dx.z*dx.z
)
+
fp3.z*
(
dz.z*sz
)
+
fp3.y*
((
-dx.z*dy.z
)
*wxScaled
+
dy.z*sz
-
dx.x*t1
-
dx.y*t2
)
+
f.z*originWeight
;
mixed4
fp3
=
localPosition*f.z
;
ADD_FORCE
(
localCoordsAtoms[j],
fresult
)
;
f1.x
+=
fp1.x*wxScaled.x*
(
1-dx.x*dx.x
)
+
fp1.z*
(
dz.x*sx1
)
+
fp1.y*
((
-dx.x*dy.x
)
*wxScaled.x
+
dy.x*sx1
-
dx.y*t12
-
dx.z*t13
)
+
f.x*originWeights.x
;
}
f1.y
+=
fp1.x*wxScaled.x*
(
-dx.x*dx.y
)
+
fp1.z*
(
dz.x*sy1+t13
)
+
fp1.y*
((
-dx.y*dy.x-dz.z
)
*wxScaled.x
+
dy.x*sy1
+
dx.y*t11
)
;
f1.z
+=
fp1.x*wxScaled.x*
(
-dx.x*dx.z
)
+
fp1.z*
(
dz.x*sz1-t12
)
+
fp1.y*
((
-dx.z*dy.x+dz.y
)
*wxScaled.x
+
dy.x*sz1
+
dx.z*t11
)
;
f2.x
+=
fp1.x*wxScaled.y*
(
1-dx.x*dx.x
)
+
fp1.z*
(
dz.x*sx2
)
+
fp1.y*
((
-dx.x*dy.x
)
*wxScaled.y
+
dy.x*sx2
-
dx.y*t22
-
dx.z*t23
)
+
f.x*originWeights.y
;
f2.y
+=
fp1.x*wxScaled.y*
(
-dx.x*dx.y
)
+
fp1.z*
(
dz.x*sy2+t23
)
+
fp1.y*
((
-dx.y*dy.x-dz.z
)
*wxScaled.y
+
dy.x*sy2
+
dx.y*t21
)
;
f2.z
+=
fp1.x*wxScaled.y*
(
-dx.x*dx.z
)
+
fp1.z*
(
dz.x*sz2-t22
)
+
fp1.y*
((
-dx.z*dy.x+dz.y
)
*wxScaled.y
+
dy.x*sz2
+
dx.z*t21
)
;
f3.x
+=
fp1.x*wxScaled.z*
(
1-dx.x*dx.x
)
+
fp1.z*
(
dz.x*sx3
)
+
fp1.y*
((
-dx.x*dy.x
)
*wxScaled.z
+
dy.x*sx3
-
dx.y*t32
-
dx.z*t33
)
+
f.x*originWeights.z
;
f3.y
+=
fp1.x*wxScaled.z*
(
-dx.x*dx.y
)
+
fp1.z*
(
dz.x*sy3+t33
)
+
fp1.y*
((
-dx.y*dy.x-dz.z
)
*wxScaled.z
+
dy.x*sy3
+
dx.y*t31
)
;
f3.z
+=
fp1.x*wxScaled.z*
(
-dx.x*dx.z
)
+
fp1.z*
(
dz.x*sz3-t32
)
+
fp1.y*
((
-dx.z*dy.x+dz.y
)
*wxScaled.z
+
dy.x*sz3
+
dx.z*t31
)
;
f1.x
+=
fp2.x*wxScaled.x*
(
-dx.y*dx.x
)
+
fp2.z*
(
dz.y*sx1-t13
)
-
fp2.y*
((
dx.x*dy.y-dz.z
)
*wxScaled.x
-
dy.y*sx1
-
dx.x*t12
)
;
f1.y
+=
fp2.x*wxScaled.x*
(
1-dx.y*dx.y
)
+
fp2.z*
(
dz.y*sy1
)
-
fp2.y*
((
dx.y*dy.y
)
*wxScaled.x
-
dy.y*sy1
+
dx.x*t11
+
dx.z*t13
)
+
f.y*originWeights.x
;
f1.z
+=
fp2.x*wxScaled.x*
(
-dx.y*dx.z
)
+
fp2.z*
(
dz.y*sz1+t11
)
-
fp2.y*
((
dx.z*dy.y+dz.x
)
*wxScaled.x
-
dy.y*sz1
-
dx.z*t12
)
;
f2.x
+=
fp2.x*wxScaled.y*
(
-dx.y*dx.x
)
+
fp2.z*
(
dz.y*sx2-t23
)
-
fp2.y*
((
dx.x*dy.y-dz.z
)
*wxScaled.y
-
dy.y*sx2
-
dx.x*t22
)
;
f2.y
+=
fp2.x*wxScaled.y*
(
1-dx.y*dx.y
)
+
fp2.z*
(
dz.y*sy2
)
-
fp2.y*
((
dx.y*dy.y
)
*wxScaled.y
-
dy.y*sy2
+
dx.x*t21
+
dx.z*t23
)
+
f.y*originWeights.y
;
f2.z
+=
fp2.x*wxScaled.y*
(
-dx.y*dx.z
)
+
fp2.z*
(
dz.y*sz2+t21
)
-
fp2.y*
((
dx.z*dy.y+dz.x
)
*wxScaled.y
-
dy.y*sz2
-
dx.z*t22
)
;
f3.x
+=
fp2.x*wxScaled.z*
(
-dx.y*dx.x
)
+
fp2.z*
(
dz.y*sx3-t33
)
-
fp2.y*
((
dx.x*dy.y-dz.z
)
*wxScaled.z
-
dy.y*sx3
-
dx.x*t32
)
;
f3.y
+=
fp2.x*wxScaled.z*
(
1-dx.y*dx.y
)
+
fp2.z*
(
dz.y*sy3
)
-
fp2.y*
((
dx.y*dy.y
)
*wxScaled.z
-
dy.y*sy3
+
dx.x*t31
+
dx.z*t33
)
+
f.y*originWeights.z
;
f3.z
+=
fp2.x*wxScaled.z*
(
-dx.y*dx.z
)
+
fp2.z*
(
dz.y*sz3+t31
)
-
fp2.y*
((
dx.z*dy.y+dz.x
)
*wxScaled.z
-
dy.y*sz3
-
dx.z*t32
)
;
f1.x
+=
fp3.x*wxScaled.x*
(
-dx.z*dx.x
)
+
fp3.z*
(
dz.z*sx1+t12
)
+
fp3.y*
((
-dx.x*dy.z-dz.y
)
*wxScaled.x
+
dy.z*sx1
+
dx.x*t13
)
;
f1.y
+=
fp3.x*wxScaled.x*
(
-dx.z*dx.y
)
+
fp3.z*
(
dz.z*sy1-t11
)
+
fp3.y*
((
-dx.y*dy.z+dz.x
)
*wxScaled.x
+
dy.z*sy1
+
dx.y*t13
)
;
f1.z
+=
fp3.x*wxScaled.x*
(
1-dx.z*dx.z
)
+
fp3.z*
(
dz.z*sz1
)
+
fp3.y*
((
-dx.z*dy.z
)
*wxScaled.x
+
dy.z*sz1
-
dx.x*t11
-
dx.y*t12
)
+
f.z*originWeights.x
;
f2.x
+=
fp3.x*wxScaled.y*
(
-dx.z*dx.x
)
+
fp3.z*
(
dz.z*sx2+t22
)
+
fp3.y*
((
-dx.x*dy.z-dz.y
)
*wxScaled.y
+
dy.z*sx2
+
dx.x*t23
)
;
f2.y
+=
fp3.x*wxScaled.y*
(
-dx.z*dx.y
)
+
fp3.z*
(
dz.z*sy2-t21
)
+
fp3.y*
((
-dx.y*dy.z+dz.x
)
*wxScaled.y
+
dy.z*sy2
+
dx.y*t23
)
;
f2.z
+=
fp3.x*wxScaled.y*
(
1-dx.z*dx.z
)
+
fp3.z*
(
dz.z*sz2
)
+
fp3.y*
((
-dx.z*dy.z
)
*wxScaled.y
+
dy.z*sz2
-
dx.x*t21
-
dx.y*t22
)
+
f.z*originWeights.y
;
f3.x
+=
fp3.x*wxScaled.z*
(
-dx.z*dx.x
)
+
fp3.z*
(
dz.z*sx3+t32
)
+
fp3.y*
((
-dx.x*dy.z-dz.y
)
*wxScaled.z
+
dy.z*sx3
+
dx.x*t33
)
;
f3.y
+=
fp3.x*wxScaled.z*
(
-dx.z*dx.y
)
+
fp3.z*
(
dz.z*sy3-t31
)
+
fp3.y*
((
-dx.y*dy.z+dz.x
)
*wxScaled.z
+
dy.z*sy3
+
dx.y*t33
)
;
f3.z
+=
fp3.x*wxScaled.z*
(
1-dx.z*dx.z
)
+
fp3.z*
(
dz.z*sz3
)
+
fp3.y*
((
-dx.z*dy.z
)
*wxScaled.z
+
dy.z*sz3
-
dx.x*t31
-
dx.y*t32
)
+
f.z*originWeights.z
;
ADD_FORCE
(
atoms.y,
f1
)
;
ADD_FORCE
(
atoms.z,
f2
)
;
ADD_FORCE
(
atoms.w,
f3
)
;
}
}
}
}
platforms/opencl/tests/TestOpenCLCustomCVForce.cpp
0 → 100644
View file @
18295108
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "OpenCLTests.h"
#include "TestCustomCVForce.h"
void
runPlatformTests
()
{
}
platforms/opencl/tests/TestOpenCLFFT.cpp
View file @
18295108
...
@@ -54,7 +54,7 @@ template <class Real2>
...
@@ -54,7 +54,7 @@ template <class Real2>
void
testTransform
(
bool
realToComplex
,
int
xsize
,
int
ysize
,
int
zsize
)
{
void
testTransform
(
bool
realToComplex
,
int
xsize
,
int
ysize
,
int
zsize
)
{
System
system
;
System
system
;
system
.
addParticle
(
0.0
);
system
.
addParticle
(
0.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
,
1
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
,
1
,
NULL
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
initialize
();
OpenMM_SFMT
::
SFMT
sfmt
;
OpenMM_SFMT
::
SFMT
sfmt
;
...
...
platforms/opencl/tests/TestOpenCLRandom.cpp
View file @
18295108
...
@@ -54,7 +54,7 @@ void testGaussian() {
...
@@ -54,7 +54,7 @@ void testGaussian() {
System
system
;
System
system
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
,
1
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
,
1
,
NULL
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
initialize
();
context
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
0
);
context
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
0
);
...
...
platforms/opencl/tests/TestOpenCLSort.cpp
View file @
18295108
...
@@ -64,7 +64,7 @@ void verifySorting(vector<float> array) {
...
@@ -64,7 +64,7 @@ void verifySorting(vector<float> array) {
System
system
;
System
system
;
system
.
addParticle
(
0.0
);
system
.
addParticle
(
0.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
,
1
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
,
"false"
,
1
,
NULL
);
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
OpenCLContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
initialize
();
OpenCLArray
data
(
context
,
array
.
size
(),
sizeof
(
float
),
"sortData"
);
OpenCLArray
data
(
context
,
array
.
size
(),
sizeof
(
float
),
"sortData"
);
...
...
platforms/reference/include/ReferenceCustomCVForce.h
0 → 100644
View file @
18295108
/* Portions copyright (c) 2017 Stanford University and Simbios.
* Contributors: Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef __ReferenceCustomCVForce_H__
#define __ReferenceCustomCVForce_H__
#include "openmm/CustomCVForce.h"
#include "openmm/internal/ContextImpl.h"
#include "lepton/ExpressionProgram.h"
#include <map>
#include <string>
#include <vector>
namespace
OpenMM
{
class
ReferenceCustomCVForce
{
private:
Lepton
::
ExpressionProgram
energyExpression
;
std
::
vector
<
std
::
string
>
variableNames
,
paramDerivNames
;
std
::
vector
<
Lepton
::
ExpressionProgram
>
variableDerivExpressions
;
std
::
vector
<
Lepton
::
ExpressionProgram
>
paramDerivExpressions
;
public:
/**
* Constructor
*/
ReferenceCustomCVForce
(
const
OpenMM
::
CustomCVForce
&
force
);
/**
* Destructor
*/
~
ReferenceCustomCVForce
();
/**
* Calculate the interaction.
*
* @param innerContext the context created by the force for evaluating collective variables
* @param atomCoordinates atom coordinates
* @param globalParameters the values of global parameters
* @param forces the forces are added to this
* @param totalEnergy the energy is added to this
* @param energyParamDerivs parameter derivatives are added to this
*/
void
calculateIxn
(
ContextImpl
&
innerContext
,
std
::
vector
<
OpenMM
::
Vec3
>&
atomCoordinates
,
const
std
::
map
<
std
::
string
,
double
>&
globalParameters
,
std
::
vector
<
OpenMM
::
Vec3
>&
forces
,
double
*
totalEnergy
,
std
::
map
<
std
::
string
,
double
>&
energyParamDerivs
)
const
;
};
}
// namespace OpenMM
#endif // __ReferenceCustomCVForce_H__
platforms/reference/include/ReferenceKernels.h
View file @
18295108
...
@@ -45,6 +45,7 @@ class ReferenceObc;
...
@@ -45,6 +45,7 @@ class ReferenceObc;
class
ReferenceAndersenThermostat
;
class
ReferenceAndersenThermostat
;
class
ReferenceCustomCentroidBondIxn
;
class
ReferenceCustomCentroidBondIxn
;
class
ReferenceCustomCompoundBondIxn
;
class
ReferenceCustomCompoundBondIxn
;
class
ReferenceCustomCVForce
;
class
ReferenceCustomHbondIxn
;
class
ReferenceCustomHbondIxn
;
class
ReferenceCustomManyParticleIxn
;
class
ReferenceCustomManyParticleIxn
;
class
ReferenceGayBerneForce
;
class
ReferenceGayBerneForce
;
...
@@ -1006,6 +1007,44 @@ private:
...
@@ -1006,6 +1007,44 @@ private:
ReferenceGayBerneForce
*
ixn
;
ReferenceGayBerneForce
*
ixn
;
};
};
/**
* This kernel is invoked by CustomCVForce to calculate the forces acting on the system and the energy of the system.
*/
class
ReferenceCalcCustomCVForceKernel
:
public
CalcCustomCVForceKernel
{
public:
ReferenceCalcCustomCVForceKernel
(
std
::
string
name
,
const
Platform
&
platform
)
:
CalcCustomCVForceKernel
(
name
,
platform
),
ixn
(
NULL
)
{
}
~
ReferenceCalcCustomCVForceKernel
();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomCVForce this kernel will be used for
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/
void
initialize
(
const
System
&
system
,
const
CustomCVForce
&
force
,
ContextImpl
&
innerContext
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
ContextImpl
&
innerContext
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy state information to the inner context.
*
* @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/
void
copyState
(
ContextImpl
&
context
,
ContextImpl
&
innerContext
);
private:
ReferenceCustomCVForce
*
ixn
;
std
::
vector
<
std
::
string
>
globalParameterNames
,
energyParamDerivNames
;
};
/**
/**
* This kernel is invoked by VerletIntegrator to take one time step.
* This kernel is invoked by VerletIntegrator to take one time step.
*/
*/
...
...
platforms/reference/src/ReferenceKernelFactory.cpp
View file @
18295108
...
@@ -78,6 +78,8 @@ KernelImpl* ReferenceKernelFactory::createKernelImpl(std::string name, const Pla
...
@@ -78,6 +78,8 @@ KernelImpl* ReferenceKernelFactory::createKernelImpl(std::string name, const Pla
return
new
ReferenceCalcCustomCentroidBondForceKernel
(
name
,
platform
);
return
new
ReferenceCalcCustomCentroidBondForceKernel
(
name
,
platform
);
if
(
name
==
CalcCustomCompoundBondForceKernel
::
Name
())
if
(
name
==
CalcCustomCompoundBondForceKernel
::
Name
())
return
new
ReferenceCalcCustomCompoundBondForceKernel
(
name
,
platform
);
return
new
ReferenceCalcCustomCompoundBondForceKernel
(
name
,
platform
);
if
(
name
==
CalcCustomCVForceKernel
::
Name
())
return
new
ReferenceCalcCustomCVForceKernel
(
name
,
platform
);
if
(
name
==
CalcCustomManyParticleForceKernel
::
Name
())
if
(
name
==
CalcCustomManyParticleForceKernel
::
Name
())
return
new
ReferenceCalcCustomManyParticleForceKernel
(
name
,
platform
);
return
new
ReferenceCalcCustomManyParticleForceKernel
(
name
,
platform
);
if
(
name
==
CalcGayBerneForceKernel
::
Name
())
if
(
name
==
CalcGayBerneForceKernel
::
Name
())
...
...
platforms/reference/src/ReferenceKernels.cpp
View file @
18295108
...
@@ -42,6 +42,7 @@
...
@@ -42,6 +42,7 @@
#include "ReferenceCustomBondIxn.h"
#include "ReferenceCustomBondIxn.h"
#include "ReferenceCustomCentroidBondIxn.h"
#include "ReferenceCustomCentroidBondIxn.h"
#include "ReferenceCustomCompoundBondIxn.h"
#include "ReferenceCustomCompoundBondIxn.h"
#include "ReferenceCustomCVForce.h"
#include "ReferenceCustomDynamics.h"
#include "ReferenceCustomDynamics.h"
#include "ReferenceCustomExternalIxn.h"
#include "ReferenceCustomExternalIxn.h"
#include "ReferenceCustomGBIxn.h"
#include "ReferenceCustomGBIxn.h"
...
@@ -1473,7 +1474,8 @@ double ReferenceCalcCustomGBForceKernel::execute(ContextImpl& context, bool incl
...
@@ -1473,7 +1474,8 @@ double ReferenceCalcCustomGBForceKernel::execute(ContextImpl& context, bool incl
if
(
periodic
)
if
(
periodic
)
ixn
.
setPeriodic
(
extractBoxVectors
(
context
));
ixn
.
setPeriodic
(
extractBoxVectors
(
context
));
if
(
nonbondedMethod
!=
NoCutoff
)
{
if
(
nonbondedMethod
!=
NoCutoff
)
{
computeNeighborListVoxelHash
(
*
neighborList
,
numParticles
,
posData
,
exclusions
,
extractBoxVectors
(
context
),
periodic
,
nonbondedCutoff
,
0.0
);
vector
<
set
<
int
>
>
empty
(
context
.
getSystem
().
getNumParticles
());
// Don't omit exclusions from the neighbor list
computeNeighborListVoxelHash
(
*
neighborList
,
numParticles
,
posData
,
empty
,
extractBoxVectors
(
context
),
periodic
,
nonbondedCutoff
,
0.0
);
ixn
.
setUseCutoff
(
nonbondedCutoff
,
*
neighborList
);
ixn
.
setUseCutoff
(
nonbondedCutoff
,
*
neighborList
);
}
}
map
<
string
,
double
>
globalParameters
;
map
<
string
,
double
>
globalParameters
;
...
@@ -2015,6 +2017,44 @@ void ReferenceCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& cont
...
@@ -2015,6 +2017,44 @@ void ReferenceCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& cont
ixn
=
new
ReferenceGayBerneForce
(
force
);
ixn
=
new
ReferenceGayBerneForce
(
force
);
}
}
ReferenceCalcCustomCVForceKernel
::~
ReferenceCalcCustomCVForceKernel
()
{
if
(
ixn
!=
NULL
)
delete
ixn
;
}
void
ReferenceCalcCustomCVForceKernel
::
initialize
(
const
System
&
system
,
const
CustomCVForce
&
force
,
ContextImpl
&
innerContext
)
{
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
globalParameterNames
.
push_back
(
force
.
getGlobalParameterName
(
i
));
for
(
int
i
=
0
;
i
<
force
.
getNumEnergyParameterDerivatives
();
i
++
)
energyParamDerivNames
.
push_back
(
force
.
getEnergyParameterDerivativeName
(
i
));
ixn
=
new
ReferenceCustomCVForce
(
force
);
}
double
ReferenceCalcCustomCVForceKernel
::
execute
(
ContextImpl
&
context
,
ContextImpl
&
innerContext
,
bool
includeForces
,
bool
includeEnergy
)
{
copyState
(
context
,
innerContext
);
vector
<
Vec3
>&
posData
=
extractPositions
(
context
);
vector
<
Vec3
>&
forceData
=
extractForces
(
context
);
double
energy
=
0
;
map
<
string
,
double
>
globalParameters
;
for
(
auto
&
name
:
globalParameterNames
)
globalParameters
[
name
]
=
context
.
getParameter
(
name
);
map
<
string
,
double
>&
energyParamDerivs
=
extractEnergyParameterDerivatives
(
context
);
ixn
->
calculateIxn
(
innerContext
,
posData
,
globalParameters
,
forceData
,
includeEnergy
?
&
energy
:
NULL
,
energyParamDerivs
);
return
energy
;
}
void
ReferenceCalcCustomCVForceKernel
::
copyState
(
ContextImpl
&
context
,
ContextImpl
&
innerContext
)
{
extractPositions
(
innerContext
)
=
extractPositions
(
context
);
extractVelocities
(
innerContext
)
=
extractVelocities
(
context
);
Vec3
a
,
b
,
c
;
context
.
getPeriodicBoxVectors
(
a
,
b
,
c
);
innerContext
.
setPeriodicBoxVectors
(
a
,
b
,
c
);
innerContext
.
setTime
(
context
.
getTime
());
map
<
string
,
double
>
innerParameters
=
innerContext
.
getParameters
();
for
(
auto
&
param
:
innerParameters
)
innerContext
.
setParameter
(
param
.
first
,
context
.
getParameter
(
param
.
first
));
}
ReferenceIntegrateVerletStepKernel
::~
ReferenceIntegrateVerletStepKernel
()
{
ReferenceIntegrateVerletStepKernel
::~
ReferenceIntegrateVerletStepKernel
()
{
if
(
dynamics
)
if
(
dynamics
)
delete
dynamics
;
delete
dynamics
;
...
...
platforms/reference/src/ReferencePlatform.cpp
View file @
18295108
...
@@ -64,6 +64,7 @@ ReferencePlatform::ReferencePlatform() {
...
@@ -64,6 +64,7 @@ ReferencePlatform::ReferencePlatform() {
registerKernelFactory
(
CalcCustomHbondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomHbondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCentroidBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCentroidBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCompoundBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCompoundBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCVForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomManyParticleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomManyParticleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGayBerneForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGayBerneForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
...
...
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment