Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
80c223a8
Commit
80c223a8
authored
Jul 17, 2017
by
peastman
Browse files
OpenCL implementation of LocalCoordinatesSite depending on arbitrary particles
parent
26809372
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
140 additions
and
137 deletions
+140
-137
platforms/opencl/include/OpenCLIntegrationUtilities.h
platforms/opencl/include/OpenCLIntegrationUtilities.h
+5
-2
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
+63
-35
platforms/opencl/src/kernels/virtualSites.cl
platforms/opencl/src/kernels/virtualSites.cl
+72
-100
No files found.
platforms/opencl/include/OpenCLIntegrationUtilities.h
View file @
80c223a8
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
4
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -156,8 +156,11 @@ private:
OpenCLArray
*
vsite3AvgWeights
;
OpenCLArray
*
vsiteOutOfPlaneAtoms
;
OpenCLArray
*
vsiteOutOfPlaneWeights
;
OpenCLArray
*
vsiteLocalCoordsIndex
;
OpenCLArray
*
vsiteLocalCoordsAtoms
;
OpenCLArray
*
vsiteLocalCoordsParams
;
OpenCLArray
*
vsiteLocalCoordsWeights
;
OpenCLArray
*
vsiteLocalCoordsPos
;
OpenCLArray
*
vsiteLocalCoordsStartIndex
;
int
randomPos
;
int
lastSeed
,
numVsites
;
bool
hasInitializedPosConstraintKernels
,
hasInitializedVelConstraintKernels
,
ccmaUseDirectBuffer
,
hasOverlappingVsites
;
...
...
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
View file @
80c223a8
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -102,7 +102,8 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
ccmaReducedMass
(
NULL
),
ccmaAtomConstraints
(
NULL
),
ccmaNumAtomConstraints
(
NULL
),
ccmaConstraintMatrixColumn
(
NULL
),
ccmaConstraintMatrixValue
(
NULL
),
ccmaDelta1
(
NULL
),
ccmaDelta2
(
NULL
),
ccmaConverged
(
NULL
),
ccmaConvergedHostBuffer
(
NULL
),
vsite2AvgAtoms
(
NULL
),
vsite2AvgWeights
(
NULL
),
vsite3AvgAtoms
(
NULL
),
vsite3AvgWeights
(
NULL
),
vsiteOutOfPlaneAtoms
(
NULL
),
vsiteOutOfPlaneWeights
(
NULL
),
vsiteLocalCoordsAtoms
(
NULL
),
vsiteLocalCoordsParams
(
NULL
),
vsiteOutOfPlaneAtoms
(
NULL
),
vsiteOutOfPlaneWeights
(
NULL
),
vsiteLocalCoordsIndex
(
NULL
),
vsiteLocalCoordsAtoms
(
NULL
),
vsiteLocalCoordsWeights
(
NULL
),
vsiteLocalCoordsPos
(
NULL
),
vsiteLocalCoordsStartIndex
(
NULL
),
hasInitializedPosConstraintKernels
(
false
),
hasInitializedVelConstraintKernels
(
false
),
hasOverlappingVsites
(
false
)
{
// Create workspace arrays.
...
...
@@ -497,8 +498,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vector
<
mm_double4
>
vsite3AvgWeightVec
;
vector
<
mm_int4
>
vsiteOutOfPlaneAtomVec
;
vector
<
mm_double4
>
vsiteOutOfPlaneWeightVec
;
vector
<
mm_int4
>
vsiteLocalCoordsAtomVec
;
vector
<
cl_double
>
vsiteLocalCoordsParamVec
;
vector
<
cl_int
>
vsiteLocalCoordsIndexVec
;
vector
<
cl_int
>
vsiteLocalCoordsAtomVec
;
vector
<
cl_int
>
vsiteLocalCoordsStartVec
;
vector
<
cl_double
>
vsiteLocalCoordsWeightVec
;
vector
<
mm_double4
>
vsiteLocalCoordsPosVec
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
if
(
system
.
isVirtualSite
(
i
))
{
if
(
dynamic_cast
<
const
TwoParticleAverageSite
*>
(
&
system
.
getVirtualSite
(
i
))
!=
NULL
)
{
...
...
@@ -526,62 +530,70 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
// An out of plane site.
const
LocalCoordinatesSite
&
site
=
dynamic_cast
<
const
LocalCoordinatesSite
&>
(
system
.
getVirtualSite
(
i
));
vsiteLocalCoordsAtomVec
.
push_back
(
mm_int4
(
i
,
site
.
getParticle
(
0
),
site
.
getParticle
(
1
),
site
.
getParticle
(
2
)));
Vec3
origin
=
site
.
getOriginWeights
();
Vec3
x
=
site
.
getXWeights
();
Vec3
y
=
site
.
getYWeights
();
int
numParticles
=
site
.
getNumParticles
();
vector
<
double
>
origin
,
x
,
y
;
site
.
getOriginWeights
(
origin
);
site
.
getXWeights
(
x
);
site
.
getYWeights
(
y
);
vsiteLocalCoordsIndexVec
.
push_back
(
i
);
vsiteLocalCoordsStartVec
.
push_back
(
vsiteLocalCoordsAtomVec
.
size
());
for
(
int
j
=
0
;
j
<
numParticles
;
j
++
)
{
vsiteLocalCoordsAtomVec
.
push_back
(
site
.
getParticle
(
j
));
vsiteLocalCoordsWeightVec
.
push_back
(
origin
[
j
]);
vsiteLocalCoordsWeightVec
.
push_back
(
x
[
j
]);
vsiteLocalCoordsWeightVec
.
push_back
(
y
[
j
]);
}
Vec3
pos
=
site
.
getLocalPosition
();
vsiteLocalCoordsParamVec
.
push_back
(
origin
[
0
]);
vsiteLocalCoordsParamVec
.
push_back
(
origin
[
1
]);
vsiteLocalCoordsParamVec
.
push_back
(
origin
[
2
]);
vsiteLocalCoordsParamVec
.
push_back
(
x
[
0
]);
vsiteLocalCoordsParamVec
.
push_back
(
x
[
1
]);
vsiteLocalCoordsParamVec
.
push_back
(
x
[
2
]);
vsiteLocalCoordsParamVec
.
push_back
(
y
[
0
]);
vsiteLocalCoordsParamVec
.
push_back
(
y
[
1
]);
vsiteLocalCoordsParamVec
.
push_back
(
y
[
2
]);
vsiteLocalCoordsParamVec
.
push_back
(
pos
[
0
]);
vsiteLocalCoordsParamVec
.
push_back
(
pos
[
1
]);
vsiteLocalCoordsParamVec
.
push_back
(
pos
[
2
]);
vsiteLocalCoordsPosVec
.
push_back
(
mm_double4
(
pos
[
0
],
pos
[
1
],
pos
[
2
],
0.0
));
}
}
}
vsiteLocalCoordsStartVec
.
push_back
(
vsiteLocalCoordsAtomVec
.
size
());
int
num2Avg
=
vsite2AvgAtomVec
.
size
();
int
num3Avg
=
vsite3AvgAtomVec
.
size
();
int
numOutOfPlane
=
vsiteOutOfPlaneAtomVec
.
size
();
int
numLocalCoords
=
vsiteLocalCoords
Atom
Vec
.
size
();
int
numLocalCoords
=
vsiteLocalCoords
Pos
Vec
.
size
();
numVsites
=
num2Avg
+
num3Avg
+
numOutOfPlane
+
numLocalCoords
;
vsite2AvgAtoms
=
OpenCLArray
::
create
<
mm_int4
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgAtoms"
);
vsite3AvgAtoms
=
OpenCLArray
::
create
<
mm_int4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgAtoms"
);
vsiteOutOfPlaneAtoms
=
OpenCLArray
::
create
<
mm_int4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneAtoms"
);
vsiteLocalCoordsAtoms
=
OpenCLArray
::
create
<
mm_int4
>
(
context
,
max
(
1
,
numLocalCoords
),
"vsiteLocalCoordinatesAtoms"
);
vsiteLocalCoordsIndex
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsIndexVec
.
size
()),
"vsiteLocalCoordsIndex"
);
vsiteLocalCoordsAtoms
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsAtomVec
.
size
()),
"vsiteLocalCoordsAtoms"
);
vsiteLocalCoordsStartIndex
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsStartVec
.
size
()),
"vsiteLocalCoordsStartIndex"
);
if
(
num2Avg
>
0
)
vsite2AvgAtoms
->
upload
(
vsite2AvgAtomVec
);
if
(
num3Avg
>
0
)
vsite3AvgAtoms
->
upload
(
vsite3AvgAtomVec
);
if
(
numOutOfPlane
>
0
)
vsiteOutOfPlaneAtoms
->
upload
(
vsiteOutOfPlaneAtomVec
);
if
(
numLocalCoords
>
0
)
if
(
numLocalCoords
>
0
)
{
vsiteLocalCoordsIndex
->
upload
(
vsiteLocalCoordsIndexVec
);
vsiteLocalCoordsAtoms
->
upload
(
vsiteLocalCoordsAtomVec
);
vsiteLocalCoordsStartIndex
->
upload
(
vsiteLocalCoordsStartVec
);
}
if
(
context
.
getUseDoublePrecision
())
{
vsite2AvgWeights
=
OpenCLArray
::
create
<
mm_double2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite3AvgWeights
=
OpenCLArray
::
create
<
mm_double4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsiteOutOfPlaneWeights
=
OpenCLArray
::
create
<
mm_double4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteLocalCoordsParams
=
OpenCLArray
::
create
<
cl_double
>
(
context
,
max
(
1
,
12
*
numLocalCoords
),
"vsiteLocalCoordinatesParams"
);
vsiteLocalCoordsWeights
=
OpenCLArray
::
create
<
cl_double
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsWeightVec
.
size
()),
"vsiteLocalCoordsWeights"
);
vsiteLocalCoordsPos
=
OpenCLArray
::
create
<
mm_double4
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsPosVec
.
size
()),
"vsiteLocalCoordsPos"
);
if
(
num2Avg
>
0
)
vsite2AvgWeights
->
upload
(
vsite2AvgWeightVec
);
if
(
num3Avg
>
0
)
vsite3AvgWeights
->
upload
(
vsite3AvgWeightVec
);
if
(
numOutOfPlane
>
0
)
vsiteOutOfPlaneWeights
->
upload
(
vsiteOutOfPlaneWeightVec
);
if
(
numLocalCoords
>
0
)
vsiteLocalCoordsParams
->
upload
(
vsiteLocalCoordsParamVec
);
if
(
numLocalCoords
>
0
)
{
vsiteLocalCoordsWeights
->
upload
(
vsiteLocalCoordsWeightVec
);
vsiteLocalCoordsPos
->
upload
(
vsiteLocalCoordsPosVec
);
}
}
else
{
vsite2AvgWeights
=
OpenCLArray
::
create
<
mm_float2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite3AvgWeights
=
OpenCLArray
::
create
<
mm_float4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsiteOutOfPlaneWeights
=
OpenCLArray
::
create
<
mm_float4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteLocalCoordsParams
=
OpenCLArray
::
create
<
float
>
(
context
,
max
(
1
,
12
*
numLocalCoords
),
"vsiteLocalCoordinatesParams"
);
vsiteLocalCoordsWeights
=
OpenCLArray
::
create
<
cl_float
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsWeightVec
.
size
()),
"vsiteLocalCoordsWeights"
);
vsiteLocalCoordsPos
=
OpenCLArray
::
create
<
mm_float4
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsPosVec
.
size
()),
"vsiteLocalCoordsPos"
);
if
(
num2Avg
>
0
)
{
vector
<
mm_float2
>
floatWeights
(
num2Avg
);
for
(
int
i
=
0
;
i
<
num2Avg
;
i
++
)
...
...
@@ -601,10 +613,14 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsiteOutOfPlaneWeights
->
upload
(
floatWeights
);
}
if
(
numLocalCoords
>
0
)
{
vector
<
cl_float
>
floatParams
(
vsiteLocalCoordsParamVec
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
vsiteLocalCoordsParamVec
.
size
();
i
++
)
floatParams
[
i
]
=
(
cl_float
)
vsiteLocalCoordsParamVec
[
i
];
vsiteLocalCoordsParams
->
upload
(
floatParams
);
vector
<
cl_float
>
floatWeights
(
vsiteLocalCoordsWeightVec
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
vsiteLocalCoordsWeightVec
.
size
();
i
++
)
floatWeights
[
i
]
=
(
cl_float
)
vsiteLocalCoordsWeightVec
[
i
];
vsiteLocalCoordsWeights
->
upload
(
floatWeights
);
vector
<
mm_float4
>
floatPos
(
vsiteLocalCoordsPosVec
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
vsiteLocalCoordsPosVec
.
size
();
i
++
)
floatPos
[
i
]
=
mm_float4
((
float
)
vsiteLocalCoordsPosVec
[
i
].
x
,
(
float
)
vsiteLocalCoordsPosVec
[
i
].
y
,
(
float
)
vsiteLocalCoordsPosVec
[
i
].
z
,
0.0
f
);
vsiteLocalCoordsPos
->
upload
(
floatPos
);
}
}
...
...
@@ -645,8 +661,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite3AvgWeights
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneAtoms
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneWeights
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsIndex
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsAtoms
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsParams
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsWeights
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsPos
->
getDeviceBuffer
());
vsitePositionKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsStartIndex
->
getDeviceBuffer
());
vsiteForceKernel
=
cl
::
Kernel
(
vsiteProgram
,
"distributeForces"
);
index
=
0
;
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getPosq
().
getDeviceBuffer
());
...
...
@@ -661,8 +680,11 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsite3AvgWeights
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneAtoms
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteOutOfPlaneWeights
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsIndex
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsAtoms
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsParams
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsWeights
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsPos
->
getDeviceBuffer
());
vsiteForceKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
vsiteLocalCoordsStartIndex
->
getDeviceBuffer
());
if
(
hasOverlappingVsites
&&
context
.
getSupports64BitGlobalAtomics
())
vsiteAddForcesKernel
=
cl
::
Kernel
(
vsiteProgram
,
"addDistributedForces"
);
}
...
...
@@ -718,10 +740,16 @@ OpenCLIntegrationUtilities::~OpenCLIntegrationUtilities() {
delete
vsiteOutOfPlaneAtoms
;
if
(
vsiteOutOfPlaneWeights
!=
NULL
)
delete
vsiteOutOfPlaneWeights
;
if
(
vsiteLocalCoordsIndex
!=
NULL
)
delete
vsiteLocalCoordsIndex
;
if
(
vsiteLocalCoordsAtoms
!=
NULL
)
delete
vsiteLocalCoordsAtoms
;
if
(
vsiteLocalCoordsParams
!=
NULL
)
delete
vsiteLocalCoordsParams
;
if
(
vsiteLocalCoordsWeights
!=
NULL
)
delete
vsiteLocalCoordsWeights
;
if
(
vsiteLocalCoordsPos
!=
NULL
)
delete
vsiteLocalCoordsPos
;
if
(
vsiteLocalCoordsStartIndex
!=
NULL
)
delete
vsiteLocalCoordsStartIndex
;
}
void
OpenCLIntegrationUtilities
::
setNextStepSize
(
double
size
)
{
...
...
platforms/opencl/src/kernels/virtualSites.cl
View file @
80c223a8
...
...
@@ -33,7 +33,9 @@ __kernel void computeVirtualSites(__global real4* restrict posq,
__global
const
int4*
restrict
avg2Atoms,
__global
const
real2*
restrict
avg2Weights,
__global
const
int4*
restrict
avg3Atoms,
__global
const
real4*
restrict
avg3Weights,
__global
const
int4*
restrict
outOfPlaneAtoms,
__global
const
real4*
restrict
outOfPlaneWeights,
__global
const
int4*
restrict
localCoordsAtoms,
__global
const
real*
restrict
localCoordsParams
)
{
__global
const
int*
restrict
localCoordsIndex,
__global
const
int*
restrict
localCoordsAtoms,
__global
const
real*
restrict
localCoordsWeights,
__global
const
real4*
restrict
localCoordsPos,
__global
const
int*
restrict
localCoordsStartIndex
)
{
#
ifndef
USE_MIXED_PRECISION
__global
real4*
posqCorrection
=
0
;
#
endif
...
...
@@ -81,30 +83,30 @@ __kernel void computeVirtualSites(__global real4* restrict posq,
//
Local
coordinates
sites.
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_LOCAL_COORDS; index += get_global_size(0)) {
int4
atoms
=
localCoordsAtoms[index]
;
__global
const
real*
params
=
&localCoordsParams[12*index]
;
mixed4
pos
=
loadPos
(
posq,
posqCorrection,
atoms.x
)
;
mixed4
pos1_4
=
loadPos
(
posq,
posqCorrection,
atoms.y
)
;
mixed4
pos2_4
=
loadPos
(
posq,
posqCorrection,
atoms.z
)
;
mixed4
pos3_4
=
loadPos
(
posq,
posqCorrection,
atoms.w
)
;
mixed4
pos1
=
(
mixed4
)
(
pos1_4.x,
pos1_4.y,
pos1_4.z,
0
)
;
mixed4
pos2
=
(
mixed4
)
(
pos2_4.x,
pos2_4.y,
pos2_4.z,
0
)
;
mixed4
pos3
=
(
mixed4
)
(
pos3_4.x,
pos3_4.y,
pos3_4.z,
0
)
;
mixed4
originWeights
=
(
mixed4
)
(
params[0],
params[1],
params[2],
0
)
;
mixed4
xWeights
=
(
mixed4
)
(
params[3],
params[4],
params[5],
0
)
;
mixed4
yWeights
=
(
mixed4
)
(
params[6],
params[7],
params[8],
0
)
;
mixed4
localPosition
=
(
mixed4
)
(
params[9],
params[10],
params[11],
0
)
;
mixed4
origin
=
pos1*originWeights.x
+
pos2*originWeights.y
+
pos3*originWeights.z
;
mixed4
xdir
=
pos1*xWeights.x
+
pos2*xWeights.y
+
pos3*xWeights.z
;
mixed4
ydir
=
pos1*yWeights.x
+
pos2*yWeights.y
+
pos3*yWeights.z
;
mixed4
zdir
=
cross
(
xdir,
ydir
)
;
xdir
*=
rsqrt
(
xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z
)
;
zdir
*=
rsqrt
(
zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z
)
;
int
siteAtomIndex
=
localCoordsIndex[index]
;
int
start
=
localCoordsStartIndex[index]
;
int
end
=
localCoordsStartIndex[index+1]
;
mixed3
origin
=
0
,
xdir
=
0
,
ydir
=
0
;
for
(
int
j
=
start
; j < end; j++) {
mixed3
pos
=
loadPos
(
posq,
posqCorrection,
localCoordsAtoms[j]
)
.
xyz
;
origin
+=
pos*localCoordsWeights[3*j]
;
xdir
+=
pos*localCoordsWeights[3*j+1]
;
ydir
+=
pos*localCoordsWeights[3*j+2]
;
}
mixed3
zdir
=
cross
(
xdir,
ydir
)
;
mixed
normXdir
=
sqrt
(
xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z
)
;
mixed
normZdir
=
sqrt
(
zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z
)
;
mixed
invNormXdir
=
(
normXdir
>
0
?
1/normXdir
:
0
)
;
mixed
invNormZdir
=
(
normZdir
>
0
?
1/normZdir
:
0
)
;
xdir
*=
invNormXdir
;
zdir
*=
invNormZdir
;
ydir
=
cross
(
zdir,
xdir
)
;
mixed3
localPosition
=
localCoordsPos[index].xyz
;
mixed4
pos
=
loadPos
(
posq,
posqCorrection,
siteAtomIndex
)
;
pos.x
=
origin.x
+
xdir.x*localPosition.x
+
ydir.x*localPosition.y
+
zdir.x*localPosition.z
;
pos.y
=
origin.y
+
xdir.y*localPosition.x
+
ydir.y*localPosition.y
+
zdir.y*localPosition.z
;
pos.z
=
origin.z
+
xdir.z*localPosition.x
+
ydir.z*localPosition.y
+
zdir.z*localPosition.z
;
storePos
(
posq,
posqCorrection,
atoms.
x,
pos
)
;
storePos
(
posq,
posqCorrection,
siteAtomInde
x,
pos
)
;
}
}
...
...
@@ -174,7 +176,9 @@ __kernel void distributeForces(__global const real4* restrict posq, __global rea
__global
const
int4*
restrict
avg2Atoms,
__global
const
real2*
restrict
avg2Weights,
__global
const
int4*
restrict
avg3Atoms,
__global
const
real4*
restrict
avg3Weights,
__global
const
int4*
restrict
outOfPlaneAtoms,
__global
const
real4*
restrict
outOfPlaneWeights,
__global
const
int4*
restrict
localCoordsAtoms,
__global
const
real*
restrict
localCoordsParams
)
{
__global
const
int*
restrict
localCoordsIndex,
__global
const
int*
restrict
localCoordsAtoms,
__global
const
real*
restrict
localCoordsWeights,
__global
const
real4*
restrict
localCoordsPos,
__global
const
int*
restrict
localCoordsStartIndex
)
{
#
ifndef
USE_MIXED_PRECISION
__global
real4*
posqCorrection
=
0
;
#
endif
...
...
@@ -225,86 +229,54 @@ __kernel void distributeForces(__global const real4* restrict posq, __global rea
//
Local
coordinates
sites.
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_LOCAL_COORDS; index += get_global_size(0)) {
int4
atoms
=
localCoordsAtoms[index]
;
__global
const
real*
params
=
&localCoordsParams[12*index]
;
mixed4
pos
=
loadPos
(
posq,
posqCorrection,
atoms.x
)
;
mixed4
pos1_4
=
loadPos
(
posq,
posqCorrection,
atoms.y
)
;
mixed4
pos2_4
=
loadPos
(
posq,
posqCorrection,
atoms.z
)
;
mixed4
pos3_4
=
loadPos
(
posq,
posqCorrection,
atoms.w
)
;
mixed4
pos1
=
(
mixed4
)
(
pos1_4.x,
pos1_4.y,
pos1_4.z,
0
)
;
mixed4
pos2
=
(
mixed4
)
(
pos2_4.x,
pos2_4.y,
pos2_4.z,
0
)
;
mixed4
pos3
=
(
mixed4
)
(
pos3_4.x,
pos3_4.y,
pos3_4.z,
0
)
;
mixed4
originWeights
=
(
mixed4
)
(
params[0],
params[1],
params[2],
0
)
;
mixed4
wx
=
(
mixed4
)
(
params[3],
params[4],
params[5],
0
)
;
mixed4
wy
=
(
mixed4
)
(
params[6],
params[7],
params[8],
0
)
;
mixed4
localPosition
=
(
mixed4
)
(
params[9],
params[10],
params[11],
0
)
;
mixed4
origin
=
pos1*originWeights.x
+
pos2*originWeights.y
+
pos3*originWeights.z
;
mixed4
xdir
=
pos1*wx.x
+
pos2*wx.y
+
pos3*wx.z
;
mixed4
ydir
=
pos1*wy.x
+
pos2*wy.y
+
pos3*wy.z
;
mixed4
zdir
=
cross
(
xdir,
ydir
)
;
mixed
invNormXdir
=
rsqrt
(
xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z
)
;
mixed
invNormZdir
=
rsqrt
(
zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z
)
;
mixed4
dx
=
xdir*invNormXdir
;
mixed4
dz
=
zdir*invNormZdir
;
mixed4
dy
=
cross
(
dz,
dx
)
;
int
siteAtomIndex
=
localCoordsIndex[index]
;
int
start
=
localCoordsStartIndex[index]
;
int
end
=
localCoordsStartIndex[index+1]
;
mixed3
origin
=
0
,
xdir
=
0
,
ydir
=
0
;
for
(
int
j
=
start
; j < end; j++) {
mixed3
pos
=
loadPos
(
posq,
posqCorrection,
localCoordsAtoms[j]
)
.
xyz
;
origin
+=
pos*localCoordsWeights[3*j]
;
xdir
+=
pos*localCoordsWeights[3*j+1]
;
ydir
+=
pos*localCoordsWeights[3*j+2]
;
}
mixed3
zdir
=
cross
(
xdir,
ydir
)
;
mixed
normXdir
=
sqrt
(
xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z
)
;
mixed
normZdir
=
sqrt
(
zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z
)
;
mixed
invNormXdir
=
(
normXdir
>
0
?
1/normXdir
:
0
)
;
mixed
invNormZdir
=
(
normZdir
>
0
?
1/normZdir
:
0
)
;
mixed3
dx
=
xdir*invNormXdir
;
mixed3
dz
=
zdir*invNormZdir
;
mixed3
dy
=
cross
(
dz,
dx
)
;
mixed3
localPosition
=
localCoordsPos[index].xyz
;
//
The
derivatives
for
this
case
are
very
complicated.
They
were
computed
with
SymPy
then
simplified
by
hand.
mixed
t11
=
(
wx.x*ydir.x-wy.x*xdir.x
)
*invNormZdir
;
mixed
t12
=
(
wx.x*ydir.y-wy.x*xdir.y
)
*invNormZdir
;
mixed
t13
=
(
wx.x*ydir.z-wy.x*xdir.z
)
*invNormZdir
;
mixed
t21
=
(
wx.y*ydir.x-wy.y*xdir.x
)
*invNormZdir
;
mixed
t22
=
(
wx.y*ydir.y-wy.y*xdir.y
)
*invNormZdir
;
mixed
t23
=
(
wx.y*ydir.z-wy.y*xdir.z
)
*invNormZdir
;
mixed
t31
=
(
wx.z*ydir.x-wy.z*xdir.x
)
*invNormZdir
;
mixed
t32
=
(
wx.z*ydir.y-wy.z*xdir.y
)
*invNormZdir
;
mixed
t33
=
(
wx.z*ydir.z-wy.z*xdir.z
)
*invNormZdir
;
mixed
sx1
=
t13*dz.y-t12*dz.z
;
mixed
sy1
=
t11*dz.z-t13*dz.x
;
mixed
sz1
=
t12*dz.x-t11*dz.y
;
mixed
sx2
=
t23*dz.y-t22*dz.z
;
mixed
sy2
=
t21*dz.z-t23*dz.x
;
mixed
sz2
=
t22*dz.x-t21*dz.y
;
mixed
sx3
=
t33*dz.y-t32*dz.z
;
mixed
sy3
=
t31*dz.z-t33*dz.x
;
mixed
sz3
=
t32*dz.x-t31*dz.y
;
mixed4
wxScaled
=
wx*invNormXdir
;
real4
f
=
force[atoms.x]
;
real4
f1
=
0
;
real4
f2
=
0
;
real4
f3
=
0
;
mixed4
fp1
=
localPosition*f.x
;
mixed4
fp2
=
localPosition*f.y
;
mixed4
fp3
=
localPosition*f.z
;
f1.x
+=
fp1.x*wxScaled.x*
(
1-dx.x*dx.x
)
+
fp1.z*
(
dz.x*sx1
)
+
fp1.y*
((
-dx.x*dy.x
)
*wxScaled.x
+
dy.x*sx1
-
dx.y*t12
-
dx.z*t13
)
+
f.x*originWeights.x
;
f1.y
+=
fp1.x*wxScaled.x*
(
-dx.x*dx.y
)
+
fp1.z*
(
dz.x*sy1+t13
)
+
fp1.y*
((
-dx.y*dy.x-dz.z
)
*wxScaled.x
+
dy.x*sy1
+
dx.y*t11
)
;
f1.z
+=
fp1.x*wxScaled.x*
(
-dx.x*dx.z
)
+
fp1.z*
(
dz.x*sz1-t12
)
+
fp1.y*
((
-dx.z*dy.x+dz.y
)
*wxScaled.x
+
dy.x*sz1
+
dx.z*t11
)
;
f2.x
+=
fp1.x*wxScaled.y*
(
1-dx.x*dx.x
)
+
fp1.z*
(
dz.x*sx2
)
+
fp1.y*
((
-dx.x*dy.x
)
*wxScaled.y
+
dy.x*sx2
-
dx.y*t22
-
dx.z*t23
)
+
f.x*originWeights.y
;
f2.y
+=
fp1.x*wxScaled.y*
(
-dx.x*dx.y
)
+
fp1.z*
(
dz.x*sy2+t23
)
+
fp1.y*
((
-dx.y*dy.x-dz.z
)
*wxScaled.y
+
dy.x*sy2
+
dx.y*t21
)
;
f2.z
+=
fp1.x*wxScaled.y*
(
-dx.x*dx.z
)
+
fp1.z*
(
dz.x*sz2-t22
)
+
fp1.y*
((
-dx.z*dy.x+dz.y
)
*wxScaled.y
+
dy.x*sz2
+
dx.z*t21
)
;
f3.x
+=
fp1.x*wxScaled.z*
(
1-dx.x*dx.x
)
+
fp1.z*
(
dz.x*sx3
)
+
fp1.y*
((
-dx.x*dy.x
)
*wxScaled.z
+
dy.x*sx3
-
dx.y*t32
-
dx.z*t33
)
+
f.x*originWeights.z
;
f3.y
+=
fp1.x*wxScaled.z*
(
-dx.x*dx.y
)
+
fp1.z*
(
dz.x*sy3+t33
)
+
fp1.y*
((
-dx.y*dy.x-dz.z
)
*wxScaled.z
+
dy.x*sy3
+
dx.y*t31
)
;
f3.z
+=
fp1.x*wxScaled.z*
(
-dx.x*dx.z
)
+
fp1.z*
(
dz.x*sz3-t32
)
+
fp1.y*
((
-dx.z*dy.x+dz.y
)
*wxScaled.z
+
dy.x*sz3
+
dx.z*t31
)
;
f1.x
+=
fp2.x*wxScaled.x*
(
-dx.y*dx.x
)
+
fp2.z*
(
dz.y*sx1-t13
)
-
fp2.y*
((
dx.x*dy.y-dz.z
)
*wxScaled.x
-
dy.y*sx1
-
dx.x*t12
)
;
f1.y
+=
fp2.x*wxScaled.x*
(
1-dx.y*dx.y
)
+
fp2.z*
(
dz.y*sy1
)
-
fp2.y*
((
dx.y*dy.y
)
*wxScaled.x
-
dy.y*sy1
+
dx.x*t11
+
dx.z*t13
)
+
f.y*originWeights.x
;
f1.z
+=
fp2.x*wxScaled.x*
(
-dx.y*dx.z
)
+
fp2.z*
(
dz.y*sz1+t11
)
-
fp2.y*
((
dx.z*dy.y+dz.x
)
*wxScaled.x
-
dy.y*sz1
-
dx.z*t12
)
;
f2.x
+=
fp2.x*wxScaled.y*
(
-dx.y*dx.x
)
+
fp2.z*
(
dz.y*sx2-t23
)
-
fp2.y*
((
dx.x*dy.y-dz.z
)
*wxScaled.y
-
dy.y*sx2
-
dx.x*t22
)
;
f2.y
+=
fp2.x*wxScaled.y*
(
1-dx.y*dx.y
)
+
fp2.z*
(
dz.y*sy2
)
-
fp2.y*
((
dx.y*dy.y
)
*wxScaled.y
-
dy.y*sy2
+
dx.x*t21
+
dx.z*t23
)
+
f.y*originWeights.y
;
f2.z
+=
fp2.x*wxScaled.y*
(
-dx.y*dx.z
)
+
fp2.z*
(
dz.y*sz2+t21
)
-
fp2.y*
((
dx.z*dy.y+dz.x
)
*wxScaled.y
-
dy.y*sz2
-
dx.z*t22
)
;
f3.x
+=
fp2.x*wxScaled.z*
(
-dx.y*dx.x
)
+
fp2.z*
(
dz.y*sx3-t33
)
-
fp2.y*
((
dx.x*dy.y-dz.z
)
*wxScaled.z
-
dy.y*sx3
-
dx.x*t32
)
;
f3.y
+=
fp2.x*wxScaled.z*
(
1-dx.y*dx.y
)
+
fp2.z*
(
dz.y*sy3
)
-
fp2.y*
((
dx.y*dy.y
)
*wxScaled.z
-
dy.y*sy3
+
dx.x*t31
+
dx.z*t33
)
+
f.y*originWeights.z
;
f3.z
+=
fp2.x*wxScaled.z*
(
-dx.y*dx.z
)
+
fp2.z*
(
dz.y*sz3+t31
)
-
fp2.y*
((
dx.z*dy.y+dz.x
)
*wxScaled.z
-
dy.y*sz3
-
dx.z*t32
)
;
f1.x
+=
fp3.x*wxScaled.x*
(
-dx.z*dx.x
)
+
fp3.z*
(
dz.z*sx1+t12
)
+
fp3.y*
((
-dx.x*dy.z-dz.y
)
*wxScaled.x
+
dy.z*sx1
+
dx.x*t13
)
;
f1.y
+=
fp3.x*wxScaled.x*
(
-dx.z*dx.y
)
+
fp3.z*
(
dz.z*sy1-t11
)
+
fp3.y*
((
-dx.y*dy.z+dz.x
)
*wxScaled.x
+
dy.z*sy1
+
dx.y*t13
)
;
f1.z
+=
fp3.x*wxScaled.x*
(
1-dx.z*dx.z
)
+
fp3.z*
(
dz.z*sz1
)
+
fp3.y*
((
-dx.z*dy.z
)
*wxScaled.x
+
dy.z*sz1
-
dx.x*t11
-
dx.y*t12
)
+
f.z*originWeights.x
;
f2.x
+=
fp3.x*wxScaled.y*
(
-dx.z*dx.x
)
+
fp3.z*
(
dz.z*sx2+t22
)
+
fp3.y*
((
-dx.x*dy.z-dz.y
)
*wxScaled.y
+
dy.z*sx2
+
dx.x*t23
)
;
f2.y
+=
fp3.x*wxScaled.y*
(
-dx.z*dx.y
)
+
fp3.z*
(
dz.z*sy2-t21
)
+
fp3.y*
((
-dx.y*dy.z+dz.x
)
*wxScaled.y
+
dy.z*sy2
+
dx.y*t23
)
;
f2.z
+=
fp3.x*wxScaled.y*
(
1-dx.z*dx.z
)
+
fp3.z*
(
dz.z*sz2
)
+
fp3.y*
((
-dx.z*dy.z
)
*wxScaled.y
+
dy.z*sz2
-
dx.x*t21
-
dx.y*t22
)
+
f.z*originWeights.y
;
f3.x
+=
fp3.x*wxScaled.z*
(
-dx.z*dx.x
)
+
fp3.z*
(
dz.z*sx3+t32
)
+
fp3.y*
((
-dx.x*dy.z-dz.y
)
*wxScaled.z
+
dy.z*sx3
+
dx.x*t33
)
;
f3.y
+=
fp3.x*wxScaled.z*
(
-dx.z*dx.y
)
+
fp3.z*
(
dz.z*sy3-t31
)
+
fp3.y*
((
-dx.y*dy.z+dz.x
)
*wxScaled.z
+
dy.z*sy3
+
dx.y*t33
)
;
f3.z
+=
fp3.x*wxScaled.z*
(
1-dx.z*dx.z
)
+
fp3.z*
(
dz.z*sz3
)
+
fp3.y*
((
-dx.z*dy.z
)
*wxScaled.z
+
dy.z*sz3
-
dx.x*t31
-
dx.y*t32
)
+
f.z*originWeights.z
;
ADD_FORCE
(
atoms.y,
f1
)
;
ADD_FORCE
(
atoms.z,
f2
)
;
ADD_FORCE
(
atoms.w,
f3
)
;
real4
f
=
force[siteAtomIndex]
;
mixed3
fp1
=
localPosition*f.x
;
mixed3
fp2
=
localPosition*f.y
;
mixed3
fp3
=
localPosition*f.z
;
for
(
int
j
=
start
; j < end; j++) {
real
originWeight
=
localCoordsWeights[3*j]
;
real
wx
=
localCoordsWeights[3*j+1]
;
real
wy
=
localCoordsWeights[3*j+2]
;
mixed
wxScaled
=
wx*invNormXdir
;
mixed
t1
=
(
wx*ydir.x-wy*xdir.x
)
*invNormZdir
;
mixed
t2
=
(
wx*ydir.y-wy*xdir.y
)
*invNormZdir
;
mixed
t3
=
(
wx*ydir.z-wy*xdir.z
)
*invNormZdir
;
mixed
sx
=
t3*dz.y-t2*dz.z
;
mixed
sy
=
t1*dz.z-t3*dz.x
;
mixed
sz
=
t2*dz.x-t1*dz.y
;
mixed4
fresult
=
0
;
fresult.x
+=
fp1.x*wxScaled*
(
1-dx.x*dx.x
)
+
fp1.z*
(
dz.x*sx
)
+
fp1.y*
((
-dx.x*dy.x
)
*wxScaled
+
dy.x*sx
-
dx.y*t2
-
dx.z*t3
)
+
f.x*originWeight
;
fresult.y
+=
fp1.x*wxScaled*
(
-dx.x*dx.y
)
+
fp1.z*
(
dz.x*sy+t3
)
+
fp1.y*
((
-dx.y*dy.x-dz.z
)
*wxScaled
+
dy.x*sy
+
dx.y*t1
)
;
fresult.z
+=
fp1.x*wxScaled*
(
-dx.x*dx.z
)
+
fp1.z*
(
dz.x*sz-t2
)
+
fp1.y*
((
-dx.z*dy.x+dz.y
)
*wxScaled
+
dy.x*sz
+
dx.z*t1
)
;
fresult.x
+=
fp2.x*wxScaled*
(
-dx.y*dx.x
)
+
fp2.z*
(
dz.y*sx-t3
)
-
fp2.y*
((
dx.x*dy.y-dz.z
)
*wxScaled
-
dy.y*sx
-
dx.x*t2
)
;
fresult.y
+=
fp2.x*wxScaled*
(
1-dx.y*dx.y
)
+
fp2.z*
(
dz.y*sy
)
-
fp2.y*
((
dx.y*dy.y
)
*wxScaled
-
dy.y*sy
+
dx.x*t1
+
dx.z*t3
)
+
f.y*originWeight
;
fresult.z
+=
fp2.x*wxScaled*
(
-dx.y*dx.z
)
+
fp2.z*
(
dz.y*sz+t1
)
-
fp2.y*
((
dx.z*dy.y+dz.x
)
*wxScaled
-
dy.y*sz
-
dx.z*t2
)
;
fresult.x
+=
fp3.x*wxScaled*
(
-dx.z*dx.x
)
+
fp3.z*
(
dz.z*sx+t2
)
+
fp3.y*
((
-dx.x*dy.z-dz.y
)
*wxScaled
+
dy.z*sx
+
dx.x*t3
)
;
fresult.y
+=
fp3.x*wxScaled*
(
-dx.z*dx.y
)
+
fp3.z*
(
dz.z*sy-t1
)
+
fp3.y*
((
-dx.y*dy.z+dz.x
)
*wxScaled
+
dy.z*sy
+
dx.y*t3
)
;
fresult.z
+=
fp3.x*wxScaled*
(
1-dx.z*dx.z
)
+
fp3.z*
(
dz.z*sz
)
+
fp3.y*
((
-dx.z*dy.z
)
*wxScaled
+
dy.z*sz
-
dx.x*t1
-
dx.y*t2
)
+
f.z*originWeight
;
ADD_FORCE
(
localCoordsAtoms[j],
fresult
)
;
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment