Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b2222de3
Commit
b2222de3
authored
Jul 18, 2017
by
Peter Eastman
Browse files
CUDA implementation of LocalCoordinatesSite depending on arbitrary particles
parent
80c223a8
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
138 additions
and
135 deletions
+138
-135
platforms/cuda/include/CudaIntegrationUtilities.h
platforms/cuda/include/CudaIntegrationUtilities.h
+5
-2
platforms/cuda/src/CudaIntegrationUtilities.cpp
platforms/cuda/src/CudaIntegrationUtilities.cpp
+62
-36
platforms/cuda/src/kernels/integrationUtilities.cu
platforms/cuda/src/kernels/integrationUtilities.cu
+67
-93
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
+1
-1
platforms/opencl/src/kernels/virtualSites.cl
platforms/opencl/src/kernels/virtualSites.cl
+3
-3
No files found.
platforms/cuda/include/CudaIntegrationUtilities.h
View file @
b2222de3
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
4
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -158,8 +158,11 @@ private:
CudaArray
*
vsite3AvgWeights
;
CudaArray
*
vsiteOutOfPlaneAtoms
;
CudaArray
*
vsiteOutOfPlaneWeights
;
CudaArray
*
vsiteLocalCoordsIndex
;
CudaArray
*
vsiteLocalCoordsAtoms
;
CudaArray
*
vsiteLocalCoordsParams
;
CudaArray
*
vsiteLocalCoordsWeights
;
CudaArray
*
vsiteLocalCoordsPos
;
CudaArray
*
vsiteLocalCoordsStartIndex
;
int
randomPos
;
int
lastSeed
,
numVsites
;
double2
lastStepSize
;
...
...
platforms/cuda/src/CudaIntegrationUtilities.cpp
View file @
b2222de3
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
7
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -103,7 +103,8 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
ccmaReducedMass
(
NULL
),
ccmaAtomConstraints
(
NULL
),
ccmaNumAtomConstraints
(
NULL
),
ccmaConstraintMatrixColumn
(
NULL
),
ccmaConstraintMatrixValue
(
NULL
),
ccmaDelta1
(
NULL
),
ccmaDelta2
(
NULL
),
ccmaConverged
(
NULL
),
ccmaConvergedMemory
(
NULL
),
vsite2AvgAtoms
(
NULL
),
vsite2AvgWeights
(
NULL
),
vsite3AvgAtoms
(
NULL
),
vsite3AvgWeights
(
NULL
),
vsiteOutOfPlaneAtoms
(
NULL
),
vsiteOutOfPlaneWeights
(
NULL
),
vsiteLocalCoordsAtoms
(
NULL
),
vsiteLocalCoordsParams
(
NULL
)
{
vsiteOutOfPlaneAtoms
(
NULL
),
vsiteOutOfPlaneWeights
(
NULL
),
vsiteLocalCoordsIndex
(
NULL
),
vsiteLocalCoordsAtoms
(
NULL
),
vsiteLocalCoordsWeights
(
NULL
),
vsiteLocalCoordsPos
(
NULL
),
vsiteLocalCoordsStartIndex
(
NULL
)
{
// Create workspace arrays.
lastStepSize
=
make_double2
(
0.0
,
0.0
);
...
...
@@ -454,8 +455,11 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
vector
<
double4
>
vsite3AvgWeightVec
;
vector
<
int4
>
vsiteOutOfPlaneAtomVec
;
vector
<
double4
>
vsiteOutOfPlaneWeightVec
;
vector
<
int4
>
vsiteLocalCoordsAtomVec
;
vector
<
double
>
vsiteLocalCoordsParamVec
;
vector
<
int
>
vsiteLocalCoordsIndexVec
;
vector
<
int
>
vsiteLocalCoordsAtomVec
;
vector
<
int
>
vsiteLocalCoordsStartVec
;
vector
<
double
>
vsiteLocalCoordsWeightVec
;
vector
<
double4
>
vsiteLocalCoordsPosVec
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
if
(
system
.
isVirtualSite
(
i
))
{
if
(
dynamic_cast
<
const
TwoParticleAverageSite
*>
(
&
system
.
getVirtualSite
(
i
))
!=
NULL
)
{
...
...
@@ -480,64 +484,72 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
vsiteOutOfPlaneWeightVec
.
push_back
(
make_double4
(
site
.
getWeight12
(),
site
.
getWeight13
(),
site
.
getWeightCross
(),
0.0
));
}
else
if
(
dynamic_cast
<
const
LocalCoordinatesSite
*>
(
&
system
.
getVirtualSite
(
i
))
!=
NULL
)
{
// A
n out of plane
site.
// A
local coordinates
site.
const
LocalCoordinatesSite
&
site
=
dynamic_cast
<
const
LocalCoordinatesSite
&>
(
system
.
getVirtualSite
(
i
));
vsiteLocalCoordsAtomVec
.
push_back
(
make_int4
(
i
,
site
.
getParticle
(
0
),
site
.
getParticle
(
1
),
site
.
getParticle
(
2
)));
Vec3
origin
=
site
.
getOriginWeights
();
Vec3
x
=
site
.
getXWeights
();
Vec3
y
=
site
.
getYWeights
();
int
numParticles
=
site
.
getNumParticles
();
vector
<
double
>
origin
,
x
,
y
;
site
.
getOriginWeights
(
origin
);
site
.
getXWeights
(
x
);
site
.
getYWeights
(
y
);
vsiteLocalCoordsIndexVec
.
push_back
(
i
);
vsiteLocalCoordsStartVec
.
push_back
(
vsiteLocalCoordsAtomVec
.
size
());
for
(
int
j
=
0
;
j
<
numParticles
;
j
++
)
{
vsiteLocalCoordsAtomVec
.
push_back
(
site
.
getParticle
(
j
));
vsiteLocalCoordsWeightVec
.
push_back
(
origin
[
j
]);
vsiteLocalCoordsWeightVec
.
push_back
(
x
[
j
]);
vsiteLocalCoordsWeightVec
.
push_back
(
y
[
j
]);
}
Vec3
pos
=
site
.
getLocalPosition
();
vsiteLocalCoordsParamVec
.
push_back
(
origin
[
0
]);
vsiteLocalCoordsParamVec
.
push_back
(
origin
[
1
]);
vsiteLocalCoordsParamVec
.
push_back
(
origin
[
2
]);
vsiteLocalCoordsParamVec
.
push_back
(
x
[
0
]);
vsiteLocalCoordsParamVec
.
push_back
(
x
[
1
]);
vsiteLocalCoordsParamVec
.
push_back
(
x
[
2
]);
vsiteLocalCoordsParamVec
.
push_back
(
y
[
0
]);
vsiteLocalCoordsParamVec
.
push_back
(
y
[
1
]);
vsiteLocalCoordsParamVec
.
push_back
(
y
[
2
]);
vsiteLocalCoordsParamVec
.
push_back
(
pos
[
0
]);
vsiteLocalCoordsParamVec
.
push_back
(
pos
[
1
]);
vsiteLocalCoordsParamVec
.
push_back
(
pos
[
2
]);
vsiteLocalCoordsPosVec
.
push_back
(
make_double4
(
pos
[
0
],
pos
[
1
],
pos
[
2
],
0.0
));
}
}
}
vsiteLocalCoordsStartVec
.
push_back
(
vsiteLocalCoordsAtomVec
.
size
());
int
num2Avg
=
vsite2AvgAtomVec
.
size
();
int
num3Avg
=
vsite3AvgAtomVec
.
size
();
int
numOutOfPlane
=
vsiteOutOfPlaneAtomVec
.
size
();
int
numLocalCoords
=
vsiteLocalCoords
Atom
Vec
.
size
();
int
numLocalCoords
=
vsiteLocalCoords
Pos
Vec
.
size
();
vsite2AvgAtoms
=
CudaArray
::
create
<
int4
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgAtoms"
);
vsite3AvgAtoms
=
CudaArray
::
create
<
int4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgAtoms"
);
vsiteOutOfPlaneAtoms
=
CudaArray
::
create
<
int4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneAtoms"
);
vsiteLocalCoordsAtoms
=
CudaArray
::
create
<
int4
>
(
context
,
max
(
1
,
numLocalCoords
),
"vsiteLocalCoordinatesAtoms"
);
vsiteLocalCoordsIndex
=
CudaArray
::
create
<
int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsIndexVec
.
size
()),
"vsiteLocalCoordsIndex"
);
vsiteLocalCoordsAtoms
=
CudaArray
::
create
<
int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsAtomVec
.
size
()),
"vsiteLocalCoordsAtoms"
);
vsiteLocalCoordsStartIndex
=
CudaArray
::
create
<
int
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsStartVec
.
size
()),
"vsiteLocalCoordsStartIndex"
);
if
(
num2Avg
>
0
)
vsite2AvgAtoms
->
upload
(
vsite2AvgAtomVec
);
if
(
num3Avg
>
0
)
vsite3AvgAtoms
->
upload
(
vsite3AvgAtomVec
);
if
(
numOutOfPlane
>
0
)
vsiteOutOfPlaneAtoms
->
upload
(
vsiteOutOfPlaneAtomVec
);
if
(
numLocalCoords
>
0
)
if
(
numLocalCoords
>
0
)
{
vsiteLocalCoordsIndex
->
upload
(
vsiteLocalCoordsIndexVec
);
vsiteLocalCoordsAtoms
->
upload
(
vsiteLocalCoordsAtomVec
);
vsiteLocalCoordsStartIndex
->
upload
(
vsiteLocalCoordsStartVec
);
}
if
(
context
.
getUseDoublePrecision
())
{
vsite2AvgWeights
=
CudaArray
::
create
<
double2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite3AvgWeights
=
CudaArray
::
create
<
double4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsiteOutOfPlaneWeights
=
CudaArray
::
create
<
double4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteLocalCoordsParams
=
CudaArray
::
create
<
double
>
(
context
,
max
(
1
,
12
*
numLocalCoords
),
"vsiteLocalCoordinatesParams"
);
vsiteLocalCoordsWeights
=
CudaArray
::
create
<
double
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsWeightVec
.
size
()),
"vsiteLocalCoordsWeights"
);
vsiteLocalCoordsPos
=
CudaArray
::
create
<
double4
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsPosVec
.
size
()),
"vsiteLocalCoordsPos"
);
if
(
num2Avg
>
0
)
vsite2AvgWeights
->
upload
(
vsite2AvgWeightVec
);
if
(
num3Avg
>
0
)
vsite3AvgWeights
->
upload
(
vsite3AvgWeightVec
);
if
(
numOutOfPlane
>
0
)
vsiteOutOfPlaneWeights
->
upload
(
vsiteOutOfPlaneWeightVec
);
if
(
numLocalCoords
>
0
)
vsiteLocalCoordsParams
->
upload
(
vsiteLocalCoordsParamVec
);
if
(
numLocalCoords
>
0
)
{
vsiteLocalCoordsWeights
->
upload
(
vsiteLocalCoordsWeightVec
);
vsiteLocalCoordsPos
->
upload
(
vsiteLocalCoordsPosVec
);
}
}
else
{
vsite2AvgWeights
=
CudaArray
::
create
<
float2
>
(
context
,
max
(
1
,
num2Avg
),
"vsite2AvgWeights"
);
vsite3AvgWeights
=
CudaArray
::
create
<
float4
>
(
context
,
max
(
1
,
num3Avg
),
"vsite3AvgWeights"
);
vsiteOutOfPlaneWeights
=
CudaArray
::
create
<
float4
>
(
context
,
max
(
1
,
numOutOfPlane
),
"vsiteOutOfPlaneWeights"
);
vsiteLocalCoordsParams
=
CudaArray
::
create
<
float
>
(
context
,
max
(
1
,
12
*
numLocalCoords
),
"vsiteLocalCoordinatesParams"
);
vsiteLocalCoordsWeights
=
CudaArray
::
create
<
float
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsWeightVec
.
size
()),
"vsiteLocalCoordsWeights"
);
vsiteLocalCoordsPos
=
CudaArray
::
create
<
float4
>
(
context
,
max
(
1
,
(
int
)
vsiteLocalCoordsPosVec
.
size
()),
"vsiteLocalCoordsPos"
);
if
(
num2Avg
>
0
)
{
vector
<
float2
>
floatWeights
(
num2Avg
);
for
(
int
i
=
0
;
i
<
num2Avg
;
i
++
)
...
...
@@ -557,10 +569,14 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
vsiteOutOfPlaneWeights
->
upload
(
floatWeights
);
}
if
(
numLocalCoords
>
0
)
{
vector
<
float
>
floatParams
(
vsiteLocalCoordsParamVec
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
vsiteLocalCoordsParamVec
.
size
();
i
++
)
floatParams
[
i
]
=
(
float
)
vsiteLocalCoordsParamVec
[
i
];
vsiteLocalCoordsParams
->
upload
(
floatParams
);
vector
<
float
>
floatWeights
(
vsiteLocalCoordsWeightVec
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
vsiteLocalCoordsWeightVec
.
size
();
i
++
)
floatWeights
[
i
]
=
(
float
)
vsiteLocalCoordsWeightVec
[
i
];
vsiteLocalCoordsWeights
->
upload
(
floatWeights
);
vector
<
float4
>
floatPos
(
vsiteLocalCoordsPosVec
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
vsiteLocalCoordsPosVec
.
size
();
i
++
)
floatPos
[
i
]
=
make_float4
((
float
)
vsiteLocalCoordsPosVec
[
i
].
x
,
(
float
)
vsiteLocalCoordsPosVec
[
i
].
y
,
(
float
)
vsiteLocalCoordsPosVec
[
i
].
z
,
0.0
f
);
vsiteLocalCoordsPos
->
upload
(
floatPos
);
}
}
...
...
@@ -644,10 +660,16 @@ CudaIntegrationUtilities::~CudaIntegrationUtilities() {
delete
vsiteOutOfPlaneAtoms
;
if
(
vsiteOutOfPlaneWeights
!=
NULL
)
delete
vsiteOutOfPlaneWeights
;
if
(
vsiteLocalCoordsIndex
!=
NULL
)
delete
vsiteLocalCoordsIndex
;
if
(
vsiteLocalCoordsAtoms
!=
NULL
)
delete
vsiteLocalCoordsAtoms
;
if
(
vsiteLocalCoordsParams
!=
NULL
)
delete
vsiteLocalCoordsParams
;
if
(
vsiteLocalCoordsWeights
!=
NULL
)
delete
vsiteLocalCoordsWeights
;
if
(
vsiteLocalCoordsPos
!=
NULL
)
delete
vsiteLocalCoordsPos
;
if
(
vsiteLocalCoordsStartIndex
!=
NULL
)
delete
vsiteLocalCoordsStartIndex
;
}
void
CudaIntegrationUtilities
::
setNextStepSize
(
double
size
)
{
...
...
@@ -747,7 +769,9 @@ void CudaIntegrationUtilities::computeVirtualSites() {
void
*
args
[]
=
{
&
context
.
getPosq
().
getDevicePointer
(),
&
posCorrection
,
&
vsite2AvgAtoms
->
getDevicePointer
(),
&
vsite2AvgWeights
->
getDevicePointer
(),
&
vsite3AvgAtoms
->
getDevicePointer
(),
&
vsite3AvgWeights
->
getDevicePointer
(),
&
vsiteOutOfPlaneAtoms
->
getDevicePointer
(),
&
vsiteOutOfPlaneWeights
->
getDevicePointer
(),
&
vsiteLocalCoordsAtoms
->
getDevicePointer
(),
&
vsiteLocalCoordsParams
->
getDevicePointer
()};
&
vsiteLocalCoordsIndex
->
getDevicePointer
(),
&
vsiteLocalCoordsAtoms
->
getDevicePointer
(),
&
vsiteLocalCoordsWeights
->
getDevicePointer
(),
&
vsiteLocalCoordsPos
->
getDevicePointer
(),
&
vsiteLocalCoordsStartIndex
->
getDevicePointer
()};
context
.
executeKernel
(
vsitePositionKernel
,
args
,
numVsites
);
}
}
...
...
@@ -759,7 +783,9 @@ void CudaIntegrationUtilities::distributeForcesFromVirtualSites() {
&
vsite2AvgAtoms
->
getDevicePointer
(),
&
vsite2AvgWeights
->
getDevicePointer
(),
&
vsite3AvgAtoms
->
getDevicePointer
(),
&
vsite3AvgWeights
->
getDevicePointer
(),
&
vsiteOutOfPlaneAtoms
->
getDevicePointer
(),
&
vsiteOutOfPlaneWeights
->
getDevicePointer
(),
&
vsiteLocalCoordsAtoms
->
getDevicePointer
(),
&
vsiteLocalCoordsParams
->
getDevicePointer
()};
&
vsiteLocalCoordsIndex
->
getDevicePointer
(),
&
vsiteLocalCoordsAtoms
->
getDevicePointer
(),
&
vsiteLocalCoordsWeights
->
getDevicePointer
(),
&
vsiteLocalCoordsPos
->
getDevicePointer
(),
&
vsiteLocalCoordsStartIndex
->
getDevicePointer
()};
context
.
executeKernel
(
vsiteForceKernel
,
args
,
numVsites
);
}
}
...
...
platforms/cuda/src/kernels/integrationUtilities.cu
View file @
b2222de3
...
...
@@ -680,7 +680,9 @@ extern "C" __global__ void updateCCMAAtomPositions(const int* __restrict__ numAt
extern
"C"
__global__
void
computeVirtualSites
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
const
int4
*
__restrict__
avg2Atoms
,
const
real2
*
__restrict__
avg2Weights
,
const
int4
*
__restrict__
avg3Atoms
,
const
real4
*
__restrict__
avg3Weights
,
const
int4
*
__restrict__
outOfPlaneAtoms
,
const
real4
*
__restrict__
outOfPlaneWeights
,
const
int4
*
__restrict__
localCoordsAtoms
,
const
real
*
__restrict__
localCoordsParams
)
{
const
int
*
__restrict__
localCoordsIndex
,
const
int
*
__restrict__
localCoordsAtoms
,
const
real
*
__restrict__
localCoordsWeights
,
const
real4
*
__restrict__
localCoordsPos
,
const
int
*
__restrict__
localCoordsStartIndex
)
{
// Two particle average sites.
...
...
@@ -732,30 +734,31 @@ extern "C" __global__ void computeVirtualSites(real4* __restrict__ posq, real4*
// Local coordinates sites.
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_LOCAL_COORDS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int4
atoms
=
localCoordsAtoms
[
index
];
const
real
*
params
=
&
localCoordsParams
[
12
*
index
];
mixed4
pos
=
loadPos
(
posq
,
posqCorrection
,
atoms
.
x
);
mixed4
pos1_4
=
loadPos
(
posq
,
posqCorrection
,
atoms
.
y
);
mixed4
pos2_4
=
loadPos
(
posq
,
posqCorrection
,
atoms
.
z
);
mixed4
pos3_4
=
loadPos
(
posq
,
posqCorrection
,
atoms
.
w
);
mixed3
pos1
=
make_mixed3
(
pos1_4
.
x
,
pos1_4
.
y
,
pos1_4
.
z
);
mixed3
pos2
=
make_mixed3
(
pos2_4
.
x
,
pos2_4
.
y
,
pos2_4
.
z
);
mixed3
pos3
=
make_mixed3
(
pos3_4
.
x
,
pos3_4
.
y
,
pos3_4
.
z
);
mixed3
originWeights
=
make_mixed3
(
params
[
0
],
params
[
1
],
params
[
2
]);
mixed3
xWeights
=
make_mixed3
(
params
[
3
],
params
[
4
],
params
[
5
]);
mixed3
yWeights
=
make_mixed3
(
params
[
6
],
params
[
7
],
params
[
8
]);
mixed3
localPosition
=
make_mixed3
(
params
[
9
],
params
[
10
],
params
[
11
]);
mixed3
origin
=
pos1
*
originWeights
.
x
+
pos2
*
originWeights
.
y
+
pos3
*
originWeights
.
z
;
mixed3
xdir
=
pos1
*
xWeights
.
x
+
pos2
*
xWeights
.
y
+
pos3
*
xWeights
.
z
;
mixed3
ydir
=
pos1
*
yWeights
.
x
+
pos2
*
yWeights
.
y
+
pos3
*
yWeights
.
z
;
int
siteAtomIndex
=
localCoordsIndex
[
index
];
int
start
=
localCoordsStartIndex
[
index
];
int
end
=
localCoordsStartIndex
[
index
+
1
];
mixed3
origin
=
make_mixed3
(
0
),
xdir
=
make_mixed3
(
0
),
ydir
=
make_mixed3
(
0
);
for
(
int
j
=
start
;
j
<
end
;
j
++
)
{
mixed3
pos
=
trimTo3
(
loadPos
(
posq
,
posqCorrection
,
localCoordsAtoms
[
j
]));
origin
+=
pos
*
localCoordsWeights
[
3
*
j
];
xdir
+=
pos
*
localCoordsWeights
[
3
*
j
+
1
];
ydir
+=
pos
*
localCoordsWeights
[
3
*
j
+
2
];
}
mixed3
zdir
=
cross
(
xdir
,
ydir
);
xdir
*=
rsqrt
(
xdir
.
x
*
xdir
.
x
+
xdir
.
y
*
xdir
.
y
+
xdir
.
z
*
xdir
.
z
);
zdir
*=
rsqrt
(
zdir
.
x
*
zdir
.
x
+
zdir
.
y
*
zdir
.
y
+
zdir
.
z
*
zdir
.
z
);
mixed
normXdir
=
sqrt
(
xdir
.
x
*
xdir
.
x
+
xdir
.
y
*
xdir
.
y
+
xdir
.
z
*
xdir
.
z
);
mixed
normZdir
=
sqrt
(
zdir
.
x
*
zdir
.
x
+
zdir
.
y
*
zdir
.
y
+
zdir
.
z
*
zdir
.
z
);
mixed
invNormXdir
=
(
normXdir
>
0
?
1
/
normXdir
:
0
);
mixed
invNormZdir
=
(
normZdir
>
0
?
1
/
normZdir
:
0
);
xdir
*=
invNormXdir
;
zdir
*=
invNormZdir
;
ydir
=
cross
(
zdir
,
xdir
);
real4
localPosition_4
=
localCoordsPos
[
index
];
mixed3
localPosition
=
make_mixed3
(
localPosition_4
.
x
,
localPosition_4
.
y
,
localPosition_4
.
z
);
mixed4
pos
=
loadPos
(
posq
,
posqCorrection
,
siteAtomIndex
);
pos
.
x
=
origin
.
x
+
xdir
.
x
*
localPosition
.
x
+
ydir
.
x
*
localPosition
.
y
+
zdir
.
x
*
localPosition
.
z
;
pos
.
y
=
origin
.
y
+
xdir
.
y
*
localPosition
.
x
+
ydir
.
y
*
localPosition
.
y
+
zdir
.
y
*
localPosition
.
z
;
pos
.
z
=
origin
.
z
+
xdir
.
z
*
localPosition
.
x
+
ydir
.
z
*
localPosition
.
y
+
zdir
.
z
*
localPosition
.
z
;
storePos
(
posq
,
posqCorrection
,
atoms
.
x
,
pos
);
storePos
(
posq
,
posqCorrection
,
siteAtomInde
x
,
pos
);
}
}
...
...
@@ -778,7 +781,9 @@ extern "C" __global__ void distributeVirtualSiteForces(const real4* __restrict__
const
int4
*
__restrict__
avg2Atoms
,
const
real2
*
__restrict__
avg2Weights
,
const
int4
*
__restrict__
avg3Atoms
,
const
real4
*
__restrict__
avg3Weights
,
const
int4
*
__restrict__
outOfPlaneAtoms
,
const
real4
*
__restrict__
outOfPlaneWeights
,
const
int4
*
__restrict__
localCoordsAtoms
,
const
real
*
__restrict__
localCoordsParams
)
{
const
int
*
__restrict__
localCoordsIndex
,
const
int
*
__restrict__
localCoordsAtoms
,
const
real
*
__restrict__
localCoordsWeights
,
const
real4
*
__restrict__
localCoordsPos
,
const
int
*
__restrict__
localCoordsStartIndex
)
{
// Two particle average sites.
...
...
@@ -826,87 +831,56 @@ extern "C" __global__ void distributeVirtualSiteForces(const real4* __restrict__
// Local coordinates sites.
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_LOCAL_COORDS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int4
atoms
=
localCoordsAtoms
[
index
];
const
real
*
params
=
&
localCoordsParams
[
12
*
index
];
mixed4
pos
=
loadPos
(
posq
,
posqCorrection
,
atoms
.
x
);
mixed4
pos1_4
=
loadPos
(
posq
,
posqCorrection
,
atoms
.
y
);
mixed4
pos2_4
=
loadPos
(
posq
,
posqCorrection
,
atoms
.
z
);
mixed4
pos3_4
=
loadPos
(
posq
,
posqCorrection
,
atoms
.
w
);
mixed3
pos1
=
make_mixed3
(
pos1_4
.
x
,
pos1_4
.
y
,
pos1_4
.
z
);
mixed3
pos2
=
make_mixed3
(
pos2_4
.
x
,
pos2_4
.
y
,
pos2_4
.
z
);
mixed3
pos3
=
make_mixed3
(
pos3_4
.
x
,
pos3_4
.
y
,
pos3_4
.
z
);
mixed3
originWeights
=
make_mixed3
(
params
[
0
],
params
[
1
],
params
[
2
]);
mixed3
wx
=
make_mixed3
(
params
[
3
],
params
[
4
],
params
[
5
]);
mixed3
wy
=
make_mixed3
(
params
[
6
],
params
[
7
],
params
[
8
]);
mixed3
localPosition
=
make_mixed3
(
params
[
9
],
params
[
10
],
params
[
11
]);
mixed3
origin
=
pos1
*
originWeights
.
x
+
pos2
*
originWeights
.
y
+
pos3
*
originWeights
.
z
;
mixed3
xdir
=
pos1
*
wx
.
x
+
pos2
*
wx
.
y
+
pos3
*
wx
.
z
;
mixed3
ydir
=
pos1
*
wy
.
x
+
pos2
*
wy
.
y
+
pos3
*
wy
.
z
;
int
siteAtomIndex
=
localCoordsIndex
[
index
];
int
start
=
localCoordsStartIndex
[
index
];
int
end
=
localCoordsStartIndex
[
index
+
1
];
mixed3
origin
=
make_mixed3
(
0
),
xdir
=
make_mixed3
(
0
),
ydir
=
make_mixed3
(
0
);
for
(
int
j
=
start
;
j
<
end
;
j
++
)
{
mixed3
pos
=
trimTo3
(
loadPos
(
posq
,
posqCorrection
,
localCoordsAtoms
[
j
]));
origin
+=
pos
*
localCoordsWeights
[
3
*
j
];
xdir
+=
pos
*
localCoordsWeights
[
3
*
j
+
1
];
ydir
+=
pos
*
localCoordsWeights
[
3
*
j
+
2
];
}
mixed3
zdir
=
cross
(
xdir
,
ydir
);
mixed
invNormXdir
=
rsqrt
(
xdir
.
x
*
xdir
.
x
+
xdir
.
y
*
xdir
.
y
+
xdir
.
z
*
xdir
.
z
);
mixed
invNormZdir
=
rsqrt
(
zdir
.
x
*
zdir
.
x
+
zdir
.
y
*
zdir
.
y
+
zdir
.
z
*
zdir
.
z
);
mixed
normXdir
=
sqrt
(
xdir
.
x
*
xdir
.
x
+
xdir
.
y
*
xdir
.
y
+
xdir
.
z
*
xdir
.
z
);
mixed
normZdir
=
sqrt
(
zdir
.
x
*
zdir
.
x
+
zdir
.
y
*
zdir
.
y
+
zdir
.
z
*
zdir
.
z
);
mixed
invNormXdir
=
(
normXdir
>
0
?
1
/
normXdir
:
0
);
mixed
invNormZdir
=
(
normZdir
>
0
?
1
/
normZdir
:
0
);
mixed3
dx
=
xdir
*
invNormXdir
;
mixed3
dz
=
zdir
*
invNormZdir
;
mixed3
dy
=
cross
(
dz
,
dx
);
real4
localPosition_4
=
localCoordsPos
[
index
];
mixed3
localPosition
=
make_mixed3
(
localPosition_4
.
x
,
localPosition_4
.
y
,
localPosition_4
.
z
);
// The derivatives for this case are very complicated. They were computed with SymPy then simplified by hand.
mixed
t11
=
(
wx
.
x
*
ydir
.
x
-
wy
.
x
*
xdir
.
x
)
*
invNormZdir
;
mixed
t12
=
(
wx
.
x
*
ydir
.
y
-
wy
.
x
*
xdir
.
y
)
*
invNormZdir
;
mixed
t13
=
(
wx
.
x
*
ydir
.
z
-
wy
.
x
*
xdir
.
z
)
*
invNormZdir
;
mixed
t21
=
(
wx
.
y
*
ydir
.
x
-
wy
.
y
*
xdir
.
x
)
*
invNormZdir
;
mixed
t22
=
(
wx
.
y
*
ydir
.
y
-
wy
.
y
*
xdir
.
y
)
*
invNormZdir
;
mixed
t23
=
(
wx
.
y
*
ydir
.
z
-
wy
.
y
*
xdir
.
z
)
*
invNormZdir
;
mixed
t31
=
(
wx
.
z
*
ydir
.
x
-
wy
.
z
*
xdir
.
x
)
*
invNormZdir
;
mixed
t32
=
(
wx
.
z
*
ydir
.
y
-
wy
.
z
*
xdir
.
y
)
*
invNormZdir
;
mixed
t33
=
(
wx
.
z
*
ydir
.
z
-
wy
.
z
*
xdir
.
z
)
*
invNormZdir
;
mixed
sx1
=
t13
*
dz
.
y
-
t12
*
dz
.
z
;
mixed
sy1
=
t11
*
dz
.
z
-
t13
*
dz
.
x
;
mixed
sz1
=
t12
*
dz
.
x
-
t11
*
dz
.
y
;
mixed
sx2
=
t23
*
dz
.
y
-
t22
*
dz
.
z
;
mixed
sy2
=
t21
*
dz
.
z
-
t23
*
dz
.
x
;
mixed
sz2
=
t22
*
dz
.
x
-
t21
*
dz
.
y
;
mixed
sx3
=
t33
*
dz
.
y
-
t32
*
dz
.
z
;
mixed
sy3
=
t31
*
dz
.
z
-
t33
*
dz
.
x
;
mixed
sz3
=
t32
*
dz
.
x
-
t31
*
dz
.
y
;
mixed3
wxScaled
=
wx
*
invNormXdir
;
real3
f
=
loadForce
(
atoms
.
x
,
force
);
real3
f
=
loadForce
(
siteAtomIndex
,
force
);
mixed3
fp1
=
localPosition
*
f
.
x
;
mixed3
fp2
=
localPosition
*
f
.
y
;
mixed3
fp3
=
localPosition
*
f
.
z
;
real3
f1
=
make_real3
(
0
);
real3
f2
=
make_real3
(
0
);
real3
f3
=
make_real3
(
0
);
f1
.
x
+=
fp1
.
x
*
wxScaled
.
x
*
(
1
-
dx
.
x
*
dx
.
x
)
+
fp1
.
z
*
(
dz
.
x
*
sx1
)
+
fp1
.
y
*
((
-
dx
.
x
*
dy
.
x
)
*
wxScaled
.
x
+
dy
.
x
*
sx1
-
dx
.
y
*
t12
-
dx
.
z
*
t13
)
+
f
.
x
*
originWeights
.
x
;
f1
.
y
+=
fp1
.
x
*
wxScaled
.
x
*
(
-
dx
.
x
*
dx
.
y
)
+
fp1
.
z
*
(
dz
.
x
*
sy1
+
t13
)
+
fp1
.
y
*
((
-
dx
.
y
*
dy
.
x
-
dz
.
z
)
*
wxScaled
.
x
+
dy
.
x
*
sy1
+
dx
.
y
*
t11
);
f1
.
z
+=
fp1
.
x
*
wxScaled
.
x
*
(
-
dx
.
x
*
dx
.
z
)
+
fp1
.
z
*
(
dz
.
x
*
sz1
-
t12
)
+
fp1
.
y
*
((
-
dx
.
z
*
dy
.
x
+
dz
.
y
)
*
wxScaled
.
x
+
dy
.
x
*
sz1
+
dx
.
z
*
t11
);
f2
.
x
+=
fp1
.
x
*
wxScaled
.
y
*
(
1
-
dx
.
x
*
dx
.
x
)
+
fp1
.
z
*
(
dz
.
x
*
sx2
)
+
fp1
.
y
*
((
-
dx
.
x
*
dy
.
x
)
*
wxScaled
.
y
+
dy
.
x
*
sx2
-
dx
.
y
*
t22
-
dx
.
z
*
t23
)
+
f
.
x
*
originWeights
.
y
;
f2
.
y
+=
fp1
.
x
*
wxScaled
.
y
*
(
-
dx
.
x
*
dx
.
y
)
+
fp1
.
z
*
(
dz
.
x
*
sy2
+
t23
)
+
fp1
.
y
*
((
-
dx
.
y
*
dy
.
x
-
dz
.
z
)
*
wxScaled
.
y
+
dy
.
x
*
sy2
+
dx
.
y
*
t21
);
f2
.
z
+=
fp1
.
x
*
wxScaled
.
y
*
(
-
dx
.
x
*
dx
.
z
)
+
fp1
.
z
*
(
dz
.
x
*
sz2
-
t22
)
+
fp1
.
y
*
((
-
dx
.
z
*
dy
.
x
+
dz
.
y
)
*
wxScaled
.
y
+
dy
.
x
*
sz2
+
dx
.
z
*
t21
);
f3
.
x
+=
fp1
.
x
*
wxScaled
.
z
*
(
1
-
dx
.
x
*
dx
.
x
)
+
fp1
.
z
*
(
dz
.
x
*
sx3
)
+
fp1
.
y
*
((
-
dx
.
x
*
dy
.
x
)
*
wxScaled
.
z
+
dy
.
x
*
sx3
-
dx
.
y
*
t32
-
dx
.
z
*
t33
)
+
f
.
x
*
originWeights
.
z
;
f3
.
y
+=
fp1
.
x
*
wxScaled
.
z
*
(
-
dx
.
x
*
dx
.
y
)
+
fp1
.
z
*
(
dz
.
x
*
sy3
+
t33
)
+
fp1
.
y
*
((
-
dx
.
y
*
dy
.
x
-
dz
.
z
)
*
wxScaled
.
z
+
dy
.
x
*
sy3
+
dx
.
y
*
t31
);
f3
.
z
+=
fp1
.
x
*
wxScaled
.
z
*
(
-
dx
.
x
*
dx
.
z
)
+
fp1
.
z
*
(
dz
.
x
*
sz3
-
t32
)
+
fp1
.
y
*
((
-
dx
.
z
*
dy
.
x
+
dz
.
y
)
*
wxScaled
.
z
+
dy
.
x
*
sz3
+
dx
.
z
*
t31
);
f1
.
x
+=
fp2
.
x
*
wxScaled
.
x
*
(
-
dx
.
y
*
dx
.
x
)
+
fp2
.
z
*
(
dz
.
y
*
sx1
-
t13
)
-
fp2
.
y
*
((
dx
.
x
*
dy
.
y
-
dz
.
z
)
*
wxScaled
.
x
-
dy
.
y
*
sx1
-
dx
.
x
*
t12
);
f1
.
y
+=
fp2
.
x
*
wxScaled
.
x
*
(
1
-
dx
.
y
*
dx
.
y
)
+
fp2
.
z
*
(
dz
.
y
*
sy1
)
-
fp2
.
y
*
((
dx
.
y
*
dy
.
y
)
*
wxScaled
.
x
-
dy
.
y
*
sy1
+
dx
.
x
*
t11
+
dx
.
z
*
t13
)
+
f
.
y
*
originWeights
.
x
;
f1
.
z
+=
fp2
.
x
*
wxScaled
.
x
*
(
-
dx
.
y
*
dx
.
z
)
+
fp2
.
z
*
(
dz
.
y
*
sz1
+
t11
)
-
fp2
.
y
*
((
dx
.
z
*
dy
.
y
+
dz
.
x
)
*
wxScaled
.
x
-
dy
.
y
*
sz1
-
dx
.
z
*
t12
);
f2
.
x
+=
fp2
.
x
*
wxScaled
.
y
*
(
-
dx
.
y
*
dx
.
x
)
+
fp2
.
z
*
(
dz
.
y
*
sx2
-
t23
)
-
fp2
.
y
*
((
dx
.
x
*
dy
.
y
-
dz
.
z
)
*
wxScaled
.
y
-
dy
.
y
*
sx2
-
dx
.
x
*
t22
);
f2
.
y
+=
fp2
.
x
*
wxScaled
.
y
*
(
1
-
dx
.
y
*
dx
.
y
)
+
fp2
.
z
*
(
dz
.
y
*
sy2
)
-
fp2
.
y
*
((
dx
.
y
*
dy
.
y
)
*
wxScaled
.
y
-
dy
.
y
*
sy2
+
dx
.
x
*
t21
+
dx
.
z
*
t23
)
+
f
.
y
*
originWeights
.
y
;
f2
.
z
+=
fp2
.
x
*
wxScaled
.
y
*
(
-
dx
.
y
*
dx
.
z
)
+
fp2
.
z
*
(
dz
.
y
*
sz2
+
t21
)
-
fp2
.
y
*
((
dx
.
z
*
dy
.
y
+
dz
.
x
)
*
wxScaled
.
y
-
dy
.
y
*
sz2
-
dx
.
z
*
t22
);
f3
.
x
+=
fp2
.
x
*
wxScaled
.
z
*
(
-
dx
.
y
*
dx
.
x
)
+
fp2
.
z
*
(
dz
.
y
*
sx3
-
t33
)
-
fp2
.
y
*
((
dx
.
x
*
dy
.
y
-
dz
.
z
)
*
wxScaled
.
z
-
dy
.
y
*
sx3
-
dx
.
x
*
t32
);
f3
.
y
+=
fp2
.
x
*
wxScaled
.
z
*
(
1
-
dx
.
y
*
dx
.
y
)
+
fp2
.
z
*
(
dz
.
y
*
sy3
)
-
fp2
.
y
*
((
dx
.
y
*
dy
.
y
)
*
wxScaled
.
z
-
dy
.
y
*
sy3
+
dx
.
x
*
t31
+
dx
.
z
*
t33
)
+
f
.
y
*
originWeights
.
z
;
f3
.
z
+=
fp2
.
x
*
wxScaled
.
z
*
(
-
dx
.
y
*
dx
.
z
)
+
fp2
.
z
*
(
dz
.
y
*
sz3
+
t31
)
-
fp2
.
y
*
((
dx
.
z
*
dy
.
y
+
dz
.
x
)
*
wxScaled
.
z
-
dy
.
y
*
sz3
-
dx
.
z
*
t32
);
f1
.
x
+=
fp3
.
x
*
wxScaled
.
x
*
(
-
dx
.
z
*
dx
.
x
)
+
fp3
.
z
*
(
dz
.
z
*
sx1
+
t12
)
+
fp3
.
y
*
((
-
dx
.
x
*
dy
.
z
-
dz
.
y
)
*
wxScaled
.
x
+
dy
.
z
*
sx1
+
dx
.
x
*
t13
);
f1
.
y
+=
fp3
.
x
*
wxScaled
.
x
*
(
-
dx
.
z
*
dx
.
y
)
+
fp3
.
z
*
(
dz
.
z
*
sy1
-
t11
)
+
fp3
.
y
*
((
-
dx
.
y
*
dy
.
z
+
dz
.
x
)
*
wxScaled
.
x
+
dy
.
z
*
sy1
+
dx
.
y
*
t13
);
f1
.
z
+=
fp3
.
x
*
wxScaled
.
x
*
(
1
-
dx
.
z
*
dx
.
z
)
+
fp3
.
z
*
(
dz
.
z
*
sz1
)
+
fp3
.
y
*
((
-
dx
.
z
*
dy
.
z
)
*
wxScaled
.
x
+
dy
.
z
*
sz1
-
dx
.
x
*
t11
-
dx
.
y
*
t12
)
+
f
.
z
*
originWeights
.
x
;
f2
.
x
+=
fp3
.
x
*
wxScaled
.
y
*
(
-
dx
.
z
*
dx
.
x
)
+
fp3
.
z
*
(
dz
.
z
*
sx2
+
t22
)
+
fp3
.
y
*
((
-
dx
.
x
*
dy
.
z
-
dz
.
y
)
*
wxScaled
.
y
+
dy
.
z
*
sx2
+
dx
.
x
*
t23
);
f2
.
y
+=
fp3
.
x
*
wxScaled
.
y
*
(
-
dx
.
z
*
dx
.
y
)
+
fp3
.
z
*
(
dz
.
z
*
sy2
-
t21
)
+
fp3
.
y
*
((
-
dx
.
y
*
dy
.
z
+
dz
.
x
)
*
wxScaled
.
y
+
dy
.
z
*
sy2
+
dx
.
y
*
t23
);
f2
.
z
+=
fp3
.
x
*
wxScaled
.
y
*
(
1
-
dx
.
z
*
dx
.
z
)
+
fp3
.
z
*
(
dz
.
z
*
sz2
)
+
fp3
.
y
*
((
-
dx
.
z
*
dy
.
z
)
*
wxScaled
.
y
+
dy
.
z
*
sz2
-
dx
.
x
*
t21
-
dx
.
y
*
t22
)
+
f
.
z
*
originWeights
.
y
;
f3
.
x
+=
fp3
.
x
*
wxScaled
.
z
*
(
-
dx
.
z
*
dx
.
x
)
+
fp3
.
z
*
(
dz
.
z
*
sx3
+
t32
)
+
fp3
.
y
*
((
-
dx
.
x
*
dy
.
z
-
dz
.
y
)
*
wxScaled
.
z
+
dy
.
z
*
sx3
+
dx
.
x
*
t33
);
f3
.
y
+=
fp3
.
x
*
wxScaled
.
z
*
(
-
dx
.
z
*
dx
.
y
)
+
fp3
.
z
*
(
dz
.
z
*
sy3
-
t31
)
+
fp3
.
y
*
((
-
dx
.
y
*
dy
.
z
+
dz
.
x
)
*
wxScaled
.
z
+
dy
.
z
*
sy3
+
dx
.
y
*
t33
);
f3
.
z
+=
fp3
.
x
*
wxScaled
.
z
*
(
1
-
dx
.
z
*
dx
.
z
)
+
fp3
.
z
*
(
dz
.
z
*
sz3
)
+
fp3
.
y
*
((
-
dx
.
z
*
dy
.
z
)
*
wxScaled
.
z
+
dy
.
z
*
sz3
-
dx
.
x
*
t31
-
dx
.
y
*
t32
)
+
f
.
z
*
originWeights
.
z
;
addForce
(
atoms
.
y
,
force
,
f1
);
addForce
(
atoms
.
z
,
force
,
f2
);
addForce
(
atoms
.
w
,
force
,
f3
);
for
(
int
j
=
start
;
j
<
end
;
j
++
)
{
real
originWeight
=
localCoordsWeights
[
3
*
j
];
real
wx
=
localCoordsWeights
[
3
*
j
+
1
];
real
wy
=
localCoordsWeights
[
3
*
j
+
2
];
mixed
wxScaled
=
wx
*
invNormXdir
;
mixed
t1
=
(
wx
*
ydir
.
x
-
wy
*
xdir
.
x
)
*
invNormZdir
;
mixed
t2
=
(
wx
*
ydir
.
y
-
wy
*
xdir
.
y
)
*
invNormZdir
;
mixed
t3
=
(
wx
*
ydir
.
z
-
wy
*
xdir
.
z
)
*
invNormZdir
;
mixed
sx
=
t3
*
dz
.
y
-
t2
*
dz
.
z
;
mixed
sy
=
t1
*
dz
.
z
-
t3
*
dz
.
x
;
mixed
sz
=
t2
*
dz
.
x
-
t1
*
dz
.
y
;
real3
fresult
=
make_real3
(
0
);
fresult
.
x
+=
fp1
.
x
*
wxScaled
*
(
1
-
dx
.
x
*
dx
.
x
)
+
fp1
.
z
*
(
dz
.
x
*
sx
)
+
fp1
.
y
*
((
-
dx
.
x
*
dy
.
x
)
*
wxScaled
+
dy
.
x
*
sx
-
dx
.
y
*
t2
-
dx
.
z
*
t3
)
+
f
.
x
*
originWeight
;
fresult
.
y
+=
fp1
.
x
*
wxScaled
*
(
-
dx
.
x
*
dx
.
y
)
+
fp1
.
z
*
(
dz
.
x
*
sy
+
t3
)
+
fp1
.
y
*
((
-
dx
.
y
*
dy
.
x
-
dz
.
z
)
*
wxScaled
+
dy
.
x
*
sy
+
dx
.
y
*
t1
);
fresult
.
z
+=
fp1
.
x
*
wxScaled
*
(
-
dx
.
x
*
dx
.
z
)
+
fp1
.
z
*
(
dz
.
x
*
sz
-
t2
)
+
fp1
.
y
*
((
-
dx
.
z
*
dy
.
x
+
dz
.
y
)
*
wxScaled
+
dy
.
x
*
sz
+
dx
.
z
*
t1
);
fresult
.
x
+=
fp2
.
x
*
wxScaled
*
(
-
dx
.
y
*
dx
.
x
)
+
fp2
.
z
*
(
dz
.
y
*
sx
-
t3
)
-
fp2
.
y
*
((
dx
.
x
*
dy
.
y
-
dz
.
z
)
*
wxScaled
-
dy
.
y
*
sx
-
dx
.
x
*
t2
);
fresult
.
y
+=
fp2
.
x
*
wxScaled
*
(
1
-
dx
.
y
*
dx
.
y
)
+
fp2
.
z
*
(
dz
.
y
*
sy
)
-
fp2
.
y
*
((
dx
.
y
*
dy
.
y
)
*
wxScaled
-
dy
.
y
*
sy
+
dx
.
x
*
t1
+
dx
.
z
*
t3
)
+
f
.
y
*
originWeight
;
fresult
.
z
+=
fp2
.
x
*
wxScaled
*
(
-
dx
.
y
*
dx
.
z
)
+
fp2
.
z
*
(
dz
.
y
*
sz
+
t1
)
-
fp2
.
y
*
((
dx
.
z
*
dy
.
y
+
dz
.
x
)
*
wxScaled
-
dy
.
y
*
sz
-
dx
.
z
*
t2
);
fresult
.
x
+=
fp3
.
x
*
wxScaled
*
(
-
dx
.
z
*
dx
.
x
)
+
fp3
.
z
*
(
dz
.
z
*
sx
+
t2
)
+
fp3
.
y
*
((
-
dx
.
x
*
dy
.
z
-
dz
.
y
)
*
wxScaled
+
dy
.
z
*
sx
+
dx
.
x
*
t3
);
fresult
.
y
+=
fp3
.
x
*
wxScaled
*
(
-
dx
.
z
*
dx
.
y
)
+
fp3
.
z
*
(
dz
.
z
*
sy
-
t1
)
+
fp3
.
y
*
((
-
dx
.
y
*
dy
.
z
+
dz
.
x
)
*
wxScaled
+
dy
.
z
*
sy
+
dx
.
y
*
t3
);
fresult
.
z
+=
fp3
.
x
*
wxScaled
*
(
1
-
dx
.
z
*
dx
.
z
)
+
fp3
.
z
*
(
dz
.
z
*
sz
)
+
fp3
.
y
*
((
-
dx
.
z
*
dy
.
z
)
*
wxScaled
+
dy
.
z
*
sz
-
dx
.
x
*
t1
-
dx
.
y
*
t2
)
+
f
.
z
*
originWeight
;
addForce
(
localCoordsAtoms
[
j
],
force
,
fresult
);
}
}
}
...
...
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
View file @
b2222de3
...
...
@@ -527,7 +527,7 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsiteOutOfPlaneWeightVec
.
push_back
(
mm_double4
(
site
.
getWeight12
(),
site
.
getWeight13
(),
site
.
getWeightCross
(),
0.0
));
}
else
if
(
dynamic_cast
<
const
LocalCoordinatesSite
*>
(
&
system
.
getVirtualSite
(
i
))
!=
NULL
)
{
// A
n out of plane
site.
// A
local coordinates
site.
const
LocalCoordinatesSite
&
site
=
dynamic_cast
<
const
LocalCoordinatesSite
&>
(
system
.
getVirtualSite
(
i
));
int
numParticles
=
site
.
getNumParticles
();
...
...
platforms/opencl/src/kernels/virtualSites.cl
View file @
b2222de3
...
...
@@ -101,7 +101,7 @@ __kernel void computeVirtualSites(__global real4* restrict posq,
xdir
*=
invNormXdir
;
zdir
*=
invNormZdir
;
ydir
=
cross
(
zdir,
xdir
)
;
mixed3
localPosition
=
localCoordsPos[index].xyz
;
mixed3
localPosition
=
convert_mixed4
(
localCoordsPos[index]
)
.
xyz
;
mixed4
pos
=
loadPos
(
posq,
posqCorrection,
siteAtomIndex
)
;
pos.x
=
origin.x
+
xdir.x*localPosition.x
+
ydir.x*localPosition.y
+
zdir.x*localPosition.z
;
pos.y
=
origin.y
+
xdir.y*localPosition.x
+
ydir.y*localPosition.y
+
zdir.y*localPosition.z
;
...
...
@@ -247,7 +247,7 @@ __kernel void distributeForces(__global const real4* restrict posq, __global rea
mixed3
dx
=
xdir*invNormXdir
;
mixed3
dz
=
zdir*invNormZdir
;
mixed3
dy
=
cross
(
dz,
dx
)
;
mixed3
localPosition
=
localCoordsPos[index].xyz
;
mixed3
localPosition
=
convert_mixed4
(
localCoordsPos[index]
)
.
xyz
;
//
The
derivatives
for
this
case
are
very
complicated.
They
were
computed
with
SymPy
then
simplified
by
hand.
...
...
@@ -266,7 +266,7 @@ __kernel void distributeForces(__global const real4* restrict posq, __global rea
mixed
sx
=
t3*dz.y-t2*dz.z
;
mixed
sy
=
t1*dz.z-t3*dz.x
;
mixed
sz
=
t2*dz.x-t1*dz.y
;
mixed
4
fresult
=
0
;
real
4
fresult
=
0
;
fresult.x
+=
fp1.x*wxScaled*
(
1-dx.x*dx.x
)
+
fp1.z*
(
dz.x*sx
)
+
fp1.y*
((
-dx.x*dy.x
)
*wxScaled
+
dy.x*sx
-
dx.y*t2
-
dx.z*t3
)
+
f.x*originWeight
;
fresult.y
+=
fp1.x*wxScaled*
(
-dx.x*dx.y
)
+
fp1.z*
(
dz.x*sy+t3
)
+
fp1.y*
((
-dx.y*dy.x-dz.z
)
*wxScaled
+
dy.x*sy
+
dx.y*t1
)
;
fresult.z
+=
fp1.x*wxScaled*
(
-dx.x*dx.z
)
+
fp1.z*
(
dz.x*sz-t2
)
+
fp1.y*
((
-dx.z*dy.x+dz.y
)
*wxScaled
+
dy.x*sz
+
dx.z*t1
)
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment