Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
72ab8864
Commit
72ab8864
authored
Aug 19, 2015
by
peastman
Browse files
Continuing CUDA implementation of spherical harmonics for multipoles
parent
fa21870f
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
537 additions
and
143 deletions
+537
-143
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+4
-5
plugins/amoeba/platforms/cuda/src/kernels/multipoles.cu
plugins/amoeba/platforms/cuda/src/kernels/multipoles.cu
+30
-30
plugins/amoeba/platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
.../platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
+391
-107
plugins/amoeba/platforms/cuda/src/kernels/sphericalMultipoles.cu
.../amoeba/platforms/cuda/src/kernels/sphericalMultipoles.cu
+111
-0
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.cpp
...ence/src/SimTKReference/AmoebaReferenceMultipoleForce.cpp
+1
-1
No files found.
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
72ab8864
...
...
@@ -1150,6 +1150,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
if
(
maxInducedIterations
>
0
)
{
defines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
inducedFieldThreads
);
defines
[
"MAX_PREV_DIIS_DIPOLES"
]
=
cu
.
intToString
(
MaxPrevDIISDipoles
);
defines
[
"USE_MUTUAL_POLARIZATION"
]
=
"1"
;
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoleInducedField
,
defines
);
computeInducedFieldKernel
=
cu
.
getKernel
(
module
,
"computeInducedField"
);
updateInducedFieldKernel
=
cu
.
getKernel
(
module
,
"updateInducedFieldByDIIS"
);
...
...
@@ -1159,10 +1160,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
stringstream
electrostaticsSource
;
if
(
usePME
)
{
electrostaticsSource
<<
CudaKernelSources
::
vectorOps
;
electrostaticsSource
<<
CudaAmoebaKernelSources
::
sphericalMultipoles
;
electrostaticsSource
<<
CudaAmoebaKernelSources
::
pmeMultipoleElectrostatics
;
electrostaticsSource
<<
(
hasQuadrupoles
?
CudaAmoebaKernelSources
::
pmeElectrostaticPairForce
:
CudaAmoebaKernelSources
::
pmeElectrostaticPairForceNoQuadrupoles
);
electrostaticsSource
<<
"#define APPLY_SCALE
\n
"
;
electrostaticsSource
<<
(
hasQuadrupoles
?
CudaAmoebaKernelSources
::
pmeElectrostaticPairForce
:
CudaAmoebaKernelSources
::
pmeElectrostaticPairForceNoQuadrupoles
);
electrostaticsThreadMemory
=
24
*
elementSize
+
3
*
sizeof
(
float
)
+
3
*
sizeof
(
int
)
/
(
double
)
cu
.
TileSize
;
if
(
!
useShuffle
)
electrostaticsThreadMemory
+=
3
*
elementSize
;
...
...
@@ -1659,8 +1658,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&
nb
.
getInteractingTiles
().
getDevicePointer
(),
&
nb
.
getInteractionCount
().
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
&
maxTiles
,
&
nb
.
getBlockCenters
().
getDevicePointer
(),
&
nb
.
getInteractingAtoms
().
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
inducedDi
pole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
sphericalDipoles
->
getDevicePointer
(),
&
sphericalQuadru
pole
s
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
electrostaticsKernel
,
electrostaticsArgs
,
numForceThreadBlocks
*
electrostaticsThreads
,
electrostaticsThreads
);
void
*
pmeTransformInducedPotentialArgs
[]
=
{
&
pmePhidp
->
getDevicePointer
(),
&
pmeCphi
->
getDevicePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeTransformPotentialKernel
,
pmeTransformInducedPotentialArgs
,
cu
.
getNumAtoms
());
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipoles.cu
View file @
72ab8864
...
...
@@ -10,10 +10,10 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
sphericalDipoles
[
offset
+
2
]
=
molecularDipoles
[
offset
+
1
];
// y -> Q_11s
offset
=
5
*
atom
;
sphericalQuadrupoles
[
offset
+
0
]
=
-
3.0
f
*
(
molecularQuadrupoles
[
offset
+
0
]
+
molecularQuadrupoles
[
offset
+
3
]);
// zz -> Q_20
sphericalQuadrupoles
[
offset
+
1
]
=
(
2
*
SQRT
(
3
))
*
molecularQuadrupoles
[
offset
+
2
];
// xz -> Q_21c
sphericalQuadrupoles
[
offset
+
2
]
=
(
2
*
SQRT
(
3
))
*
molecularQuadrupoles
[
offset
+
4
];
// yz -> Q_21s
sphericalQuadrupoles
[
offset
+
3
]
=
SQRT
(
3
)
*
(
molecularQuadrupoles
[
offset
+
0
]
-
molecularQuadrupoles
[
offset
+
3
]);
// xx-yy -> Q_22c
sphericalQuadrupoles
[
offset
+
4
]
=
(
2
*
SQRT
(
3
))
*
molecularQuadrupoles
[
offset
+
1
];
// xy -> Q_22s
sphericalQuadrupoles
[
offset
+
1
]
=
(
2
*
SQRT
(
(
real
)
3
))
*
molecularQuadrupoles
[
offset
+
2
];
// xz -> Q_21c
sphericalQuadrupoles
[
offset
+
2
]
=
(
2
*
SQRT
(
(
real
)
3
))
*
molecularQuadrupoles
[
offset
+
4
];
// yz -> Q_21s
sphericalQuadrupoles
[
offset
+
3
]
=
SQRT
(
(
real
)
3
)
*
(
molecularQuadrupoles
[
offset
+
0
]
-
molecularQuadrupoles
[
offset
+
3
]);
// xx-yy -> Q_22c
sphericalQuadrupoles
[
offset
+
4
]
=
(
2
*
SQRT
(
(
real
)
3
))
*
molecularQuadrupoles
[
offset
+
1
];
// xy -> Q_22s
// get coordinates of this atom and the z & x axis atoms
// compute the vector between the atoms and 1/sqrt(d2), d2 is distance between
...
...
@@ -236,32 +236,32 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
sphericalQuadrupole
[
4
]
*=
-
1
;
}
real
rotatedQuadrupole
[
5
]
=
{
0
,
0
,
0
,
0
,
0
};
real
sqrtThree
=
SQRT
(
3
);
rotatedQuadrupole
[
0
]
+=
sphericalQuadrupole
[
0
]
*
0.5
f
*
(
3.0
f
*
vectorZ
.
z
*
vectorZ
.
z
-
1.0
f
)
;
rotatedQuadrupole
[
1
]
+=
sphericalQuadrupole
[
0
]
*
sqrtThree
*
vectorZ
.
z
*
vector
Z
.
x
;
rotatedQuadrupole
[
2
]
+=
sphericalQuadrupole
[
0
]
*
sqrtThree
*
vectorZ
.
z
*
vector
Z
.
y
;
rotatedQuadrupole
[
3
]
+=
sphericalQuadrupole
[
0
]
*
0.5
f
*
sqrtThree
*
(
vector
Z
.
x
*
vector
Z
.
x
-
vector
Z
.
y
*
vector
Z
.
y
);
rotatedQuadrupole
[
4
]
+=
sphericalQuadrupole
[
0
]
*
sqrtThree
*
vector
Z
.
x
*
vector
Z
.
y
;
rotatedQuadrupole
[
0
]
+=
sphericalQuadrupole
[
1
]
*
sqrtThree
*
vectorZ
.
z
*
vector
X
.
z
;
rotatedQuadrupole
[
1
]
+=
sphericalQuadrupole
[
1
]
*
(
vectorZ
.
x
*
vectorX
.
z
+
vectorZ
.
z
*
vectorX
.
x
)
;
rotatedQuadrupole
[
2
]
+=
sphericalQuadrupole
[
1
]
*
(
vectorZ
.
y
*
vector
X
.
z
+
vectorZ
.
z
*
vector
X
.
y
);
rotatedQuadrupole
[
3
]
+=
sphericalQuadrupole
[
1
]
*
(
vector
Z
.
x
*
vectorX
.
x
-
vector
Z
.
y
*
vector
X
.
y
);
rotatedQuadrupole
[
4
]
+=
sphericalQuadrupole
[
1
]
*
(
vector
Z
.
y
*
vector
X
.
x
+
vector
Z
.
x
*
vector
X
.
y
);
rotatedQuadrupole
[
0
]
+=
sphericalQuadrupole
[
2
]
*
sqrtThree
*
vectorZ
.
z
*
vector
Y
.
z
;
rotatedQuadrupole
[
1
]
+=
sphericalQuadrupole
[
2
]
*
(
vectorZ
.
x
*
vector
Y
.
z
+
vectorZ
.
z
*
vector
Y
.
x
);
rotatedQuadrupole
[
2
]
+=
sphericalQuadrupole
[
2
]
*
(
vectorZ
.
y
*
vectorY
.
z
+
vectorZ
.
z
*
vectorY
.
y
)
;
rotatedQuadrupole
[
3
]
+=
sphericalQuadrupole
[
2
]
*
(
vector
Z
.
x
*
vector
Y
.
x
-
vector
Z
.
y
*
vectorY
.
y
)
;
rotatedQuadrupole
[
4
]
+=
sphericalQuadrupole
[
2
]
*
(
vector
Z
.
y
*
vectorY
.
x
+
vector
Z
.
x
*
vectorY
.
y
);
rotatedQuadrupole
[
0
]
+=
sphericalQuadrupole
[
3
]
*
0.5
f
*
sqrtThree
*
(
vector
X
.
z
*
vector
X
.
z
-
vector
Y
.
z
*
vector
Y
.
z
);
rotatedQuadrupole
[
1
]
+=
sphericalQuadrupole
[
3
]
*
(
vector
X
.
z
*
vectorX
.
x
-
vector
Y
.
z
*
vector
Y
.
x
);
rotatedQuadrupole
[
2
]
+=
sphericalQuadrupole
[
3
]
*
(
vector
X
.
z
*
vector
X
.
y
-
vector
Y
.
z
*
vectorY
.
y
)
;
rotatedQuadrupole
[
3
]
+=
sphericalQuadrupole
[
3
]
*
0.5
f
*
(
vectorX
.
x
*
vectorX
.
x
-
vectorX
.
y
*
vectorX
.
y
-
vectorY
.
x
*
vectorY
.
x
+
vectorY
.
y
*
vectorY
.
y
)
;
rotatedQuadrupole
[
4
]
+=
sphericalQuadrupole
[
3
]
*
(
vectorX
.
x
*
vector
X
.
y
-
vector
Y
.
x
*
vectorY
.
y
);
rotatedQuadrupole
[
0
]
+=
sphericalQuadrupole
[
4
]
*
sqrtThree
*
vector
X
.
z
*
vector
Y
.
z
;
rotatedQuadrupole
[
1
]
+=
sphericalQuadrupole
[
4
]
*
(
vector
X
.
x
*
vector
Y
.
z
+
vector
X
.
z
*
vector
Y
.
x
);
rotatedQuadrupole
[
2
]
+=
sphericalQuadrupole
[
4
]
*
(
vector
X
.
y
*
vectorY
.
z
+
vector
X
.
z
*
vectorY
.
y
)
;
rotatedQuadrupole
[
3
]
+=
sphericalQuadrupole
[
4
]
*
(
vectorX
.
x
*
vector
Y
.
x
-
vector
X
.
y
*
vectorY
.
y
)
;
rotatedQuadrupole
[
4
]
+=
sphericalQuadrupole
[
4
]
*
(
vectorX
.
y
*
vectorY
.
x
+
vectorX
.
x
*
vectorY
.
y
);
real
sqrtThree
=
SQRT
(
(
real
)
3
);
rotatedQuadrupole
[
0
]
+=
sphericalQuadrupole
[
0
]
*
0.5
f
*
(
3.0
f
*
vectorZ
.
z
*
vectorZ
.
z
-
1.0
f
)
+
sphericalQuadrupole
[
1
]
*
sqrtThree
*
vectorZ
.
z
*
vector
X
.
z
+
sphericalQuadrupole
[
2
]
*
sqrtThree
*
vectorZ
.
z
*
vector
Y
.
z
+
sphericalQuadrupole
[
3
]
*
0.5
f
*
sqrtThree
*
(
vector
X
.
z
*
vector
X
.
z
-
vector
Y
.
z
*
vector
Y
.
z
)
+
sphericalQuadrupole
[
4
]
*
sqrtThree
*
vector
X
.
z
*
vector
Y
.
z
;
rotatedQuadrupole
[
1
]
+=
sphericalQuadrupole
[
0
]
*
sqrtThree
*
vectorZ
.
z
*
vector
Z
.
x
+
sphericalQuadrupole
[
1
]
*
(
vectorZ
.
x
*
vectorX
.
z
+
vectorZ
.
z
*
vectorX
.
x
)
+
sphericalQuadrupole
[
2
]
*
(
vectorZ
.
x
*
vector
Y
.
z
+
vectorZ
.
z
*
vector
Y
.
x
)
+
sphericalQuadrupole
[
3
]
*
(
vector
X
.
z
*
vectorX
.
x
-
vector
Y
.
z
*
vector
Y
.
x
)
+
sphericalQuadrupole
[
4
]
*
(
vector
X
.
x
*
vector
Y
.
z
+
vector
X
.
z
*
vector
Y
.
x
);
rotatedQuadrupole
[
2
]
+=
sphericalQuadrupole
[
0
]
*
sqrtThree
*
vectorZ
.
z
*
vector
Z
.
y
+
sphericalQuadrupole
[
1
]
*
(
vectorZ
.
y
*
vector
X
.
z
+
vectorZ
.
z
*
vector
X
.
y
)
+
sphericalQuadrupole
[
2
]
*
(
vectorZ
.
y
*
vectorY
.
z
+
vectorZ
.
z
*
vectorY
.
y
)
+
sphericalQuadrupole
[
3
]
*
(
vector
X
.
z
*
vector
X
.
y
-
vector
Y
.
z
*
vectorY
.
y
)
+
sphericalQuadrupole
[
4
]
*
(
vector
X
.
y
*
vectorY
.
z
+
vector
X
.
z
*
vectorY
.
y
);
rotatedQuadrupole
[
3
]
+=
sphericalQuadrupole
[
0
]
*
0.5
f
*
sqrtThree
*
(
vector
Z
.
x
*
vector
Z
.
x
-
vector
Z
.
y
*
vector
Z
.
y
)
+
sphericalQuadrupole
[
1
]
*
(
vector
Z
.
x
*
vectorX
.
x
-
vector
Z
.
y
*
vector
X
.
y
)
+
sphericalQuadrupole
[
2
]
*
(
vector
Z
.
x
*
vector
Y
.
x
-
vector
Z
.
y
*
vectorY
.
y
)
+
sphericalQuadrupole
[
3
]
*
0.5
f
*
(
vectorX
.
x
*
vectorX
.
x
-
vectorX
.
y
*
vectorX
.
y
-
vectorY
.
x
*
vectorY
.
x
+
vectorY
.
y
*
vectorY
.
y
)
+
sphericalQuadrupole
[
4
]
*
(
vectorX
.
x
*
vector
Y
.
x
-
vector
X
.
y
*
vectorY
.
y
);
rotatedQuadrupole
[
4
]
+=
sphericalQuadrupole
[
0
]
*
sqrtThree
*
vector
Z
.
x
*
vector
Z
.
y
+
sphericalQuadrupole
[
1
]
*
(
vector
Z
.
y
*
vector
X
.
x
+
vector
Z
.
x
*
vector
X
.
y
)
+
sphericalQuadrupole
[
2
]
*
(
vector
Z
.
y
*
vectorY
.
x
+
vector
Z
.
x
*
vectorY
.
y
)
+
sphericalQuadrupole
[
3
]
*
(
vectorX
.
x
*
vector
X
.
y
-
vector
Y
.
x
*
vectorY
.
y
)
+
sphericalQuadrupole
[
4
]
*
(
vectorX
.
y
*
vectorY
.
x
+
vectorX
.
x
*
vectorY
.
y
);
sphericalQuadrupoles
[
offset
]
=
rotatedQuadrupole
[
0
];
sphericalQuadrupoles
[
offset
+
1
]
=
rotatedQuadrupole
[
1
];
sphericalQuadrupoles
[
offset
+
2
]
=
rotatedQuadrupole
[
2
];
...
...
plugins/amoeba/platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
View file @
72ab8864
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef
struct
{
real3
pos
,
force
,
torque
,
dipole
,
inducedDipole
,
inducedDipolePolar
;
real3
pos
,
force
,
torque
,
inducedDipole
,
inducedDipolePolar
,
sphericalDipole
;
real
q
;
float
thole
,
damp
;
#ifdef INCLUDE_QUADRUPOLES
real
quadrupoleXX
,
quadrupoleXY
,
quadrupoleXZ
,
quadrupoleYY
,
quadrupoleYZ
;
float
padding
;
real
sphericalQuadrupole
[
5
];
#endif
}
AtomData
;
__device__
void
computeOneInteractionF1
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
real4
delta
,
real4
bn
,
real
bn5
,
float
forceFactor
,
float
dScale
,
float
pScale
,
float
mScale
,
real3
&
force
,
real
&
energy
);
__device__
void
computeOneInteractionF2
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
real4
delta
,
real4
bn
,
float
forceFactor
,
float
dScale
,
float
pScale
,
float
mScale
,
real3
&
force
,
real
&
energy
);
__device__
void
computeOneInteractionT1
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
const
real4
delta
,
const
real4
bn
,
float
dScale
,
float
pScale
,
float
mScale
);
__device__
void
computeOneInteractionT2
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
const
real4
delta
,
const
real4
bn
,
float
dScale
,
float
pScale
,
float
mScale
);
__device__
void
computeOneInteractionF1NoScale
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
real4
delta
,
real4
bn
,
real
bn5
,
float
forceFactor
,
real3
&
force
,
real
&
energy
);
__device__
void
computeOneInteractionF2NoScale
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
real4
delta
,
real4
bn
,
float
forceFactor
,
real3
&
force
,
real
&
energy
);
__device__
void
computeOneInteractionT1NoScale
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
const
real4
delta
,
const
real4
bn
);
__device__
void
computeOneInteractionT2NoScale
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
const
real4
delta
,
const
real4
bn
);
inline
__device__
void
loadAtomData
(
AtomData
&
data
,
int
atom
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float2
*
__restrict__
dampingAndThole
)
{
inline
__device__
void
loadAtomData
(
AtomData
&
data
,
int
atom
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
sphericalDipole
,
const
real
*
__restrict__
sphericalQuadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float2
*
__restrict__
dampingAndThole
)
{
real4
atomPosq
=
posq
[
atom
];
data
.
pos
=
make_real3
(
atomPosq
.
x
,
atomPosq
.
y
,
atomPosq
.
z
);
data
.
q
=
atomPosq
.
w
;
data
.
d
ipole
.
x
=
labFrame
Dipole
[
atom
*
3
];
data
.
d
ipole
.
y
=
labFrame
Dipole
[
atom
*
3
+
1
];
data
.
d
ipole
.
z
=
labFrame
Dipole
[
atom
*
3
+
2
];
data
.
sphericalD
ipole
.
x
=
spherical
Dipole
[
atom
*
3
];
data
.
sphericalD
ipole
.
y
=
spherical
Dipole
[
atom
*
3
+
1
];
data
.
sphericalD
ipole
.
z
=
spherical
Dipole
[
atom
*
3
+
2
];
#ifdef INCLUDE_QUADRUPOLES
data
.
quadrupoleXX
=
labFrame
Quadrupole
[
atom
*
5
];
data
.
quadrupoleXY
=
labFrame
Quadrupole
[
atom
*
5
+
1
];
data
.
quadrupoleXZ
=
labFrame
Quadrupole
[
atom
*
5
+
2
];
data
.
quadrupoleYY
=
labFrame
Quadrupole
[
atom
*
5
+
3
];
data
.
quadrupoleYZ
=
labFrame
Quadrupole
[
atom
*
5
+
4
];
data
.
sphericalQuadrupole
[
0
]
=
spherical
Quadrupole
[
atom
*
5
];
data
.
sphericalQuadrupole
[
1
]
=
spherical
Quadrupole
[
atom
*
5
+
1
];
data
.
sphericalQuadrupole
[
2
]
=
spherical
Quadrupole
[
atom
*
5
+
2
];
data
.
sphericalQuadrupole
[
3
]
=
spherical
Quadrupole
[
atom
*
5
+
3
];
data
.
sphericalQuadrupole
[
4
]
=
spherical
Quadrupole
[
atom
*
5
+
4
];
#endif
data
.
inducedDipole
.
x
=
inducedDipole
[
atom
*
3
];
data
.
inducedDipole
.
y
=
inducedDipole
[
atom
*
3
+
1
];
...
...
@@ -66,7 +57,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
__device__
void
computeOneInteraction
(
AtomData
&
atom1
,
AtomData
&
atom2
,
bool
hasExclusions
,
float
dScale
,
float
pScale
,
float
mScale
,
float
forceFactor
,
real
&
energy
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
)
{
real
4
delta
;
real
3
delta
;
delta
.
x
=
atom2
.
pos
.
x
-
atom1
.
pos
.
x
;
delta
.
y
=
atom2
.
pos
.
y
-
atom1
.
pos
.
y
;
delta
.
z
=
atom2
.
pos
.
z
-
atom1
.
pos
.
z
;
...
...
@@ -74,113 +65,405 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
// periodic box
APPLY_PERIODIC_TO_DELTA
(
delta
)
delta
.
w
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
if
(
delta
.
w
>
CUTOFF_SQUARED
)
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
if
(
r2
>
CUTOFF_SQUARED
)
return
;
real
r
=
SQRT
(
delta
.
w
);
real
ralpha
=
EWALD_ALPHA
*
r
;
real
rInv
=
RSQRT
(
r2
);
real
r
=
r2
*
rInv
;
real
qiRotationMatrix
[
3
][
3
];
buildQIRotationMatrix
(
delta
,
rInv
,
qiRotationMatrix
);
real3
qiUindI
=
0.5
f
*
make_real3
(
qiRotationMatrix
[
0
][
1
]
*
atom1
.
inducedDipole
.
x
+
qiRotationMatrix
[
0
][
2
]
*
atom1
.
inducedDipole
.
y
+
qiRotationMatrix
[
0
][
0
]
*
atom1
.
inducedDipole
.
z
,
qiRotationMatrix
[
1
][
1
]
*
atom1
.
inducedDipole
.
x
+
qiRotationMatrix
[
1
][
2
]
*
atom1
.
inducedDipole
.
y
+
qiRotationMatrix
[
1
][
0
]
*
atom1
.
inducedDipole
.
z
,
qiRotationMatrix
[
2
][
1
]
*
atom1
.
inducedDipole
.
x
+
qiRotationMatrix
[
2
][
2
]
*
atom1
.
inducedDipole
.
y
+
qiRotationMatrix
[
2
][
0
]
*
atom1
.
inducedDipole
.
z
);
real3
qiUindJ
=
0.5
f
*
make_real3
(
qiRotationMatrix
[
0
][
1
]
*
atom2
.
inducedDipole
.
x
+
qiRotationMatrix
[
0
][
2
]
*
atom2
.
inducedDipole
.
y
+
qiRotationMatrix
[
0
][
0
]
*
atom2
.
inducedDipole
.
z
,
qiRotationMatrix
[
1
][
1
]
*
atom2
.
inducedDipole
.
x
+
qiRotationMatrix
[
1
][
2
]
*
atom2
.
inducedDipole
.
y
+
qiRotationMatrix
[
1
][
0
]
*
atom2
.
inducedDipole
.
z
,
qiRotationMatrix
[
2
][
1
]
*
atom2
.
inducedDipole
.
x
+
qiRotationMatrix
[
2
][
2
]
*
atom2
.
inducedDipole
.
y
+
qiRotationMatrix
[
2
][
0
]
*
atom2
.
inducedDipole
.
z
);
real3
qiUinpI
=
0.5
f
*
make_real3
(
qiRotationMatrix
[
0
][
1
]
*
atom1
.
inducedDipolePolar
.
x
+
qiRotationMatrix
[
0
][
2
]
*
atom1
.
inducedDipolePolar
.
y
+
qiRotationMatrix
[
0
][
0
]
*
atom1
.
inducedDipolePolar
.
z
,
qiRotationMatrix
[
1
][
1
]
*
atom1
.
inducedDipolePolar
.
x
+
qiRotationMatrix
[
1
][
2
]
*
atom1
.
inducedDipolePolar
.
y
+
qiRotationMatrix
[
1
][
0
]
*
atom1
.
inducedDipolePolar
.
z
,
qiRotationMatrix
[
2
][
1
]
*
atom1
.
inducedDipolePolar
.
x
+
qiRotationMatrix
[
2
][
2
]
*
atom1
.
inducedDipolePolar
.
y
+
qiRotationMatrix
[
2
][
0
]
*
atom1
.
inducedDipolePolar
.
z
);
real3
qiUinpJ
=
0.5
f
*
make_real3
(
qiRotationMatrix
[
0
][
1
]
*
atom2
.
inducedDipolePolar
.
x
+
qiRotationMatrix
[
0
][
2
]
*
atom2
.
inducedDipolePolar
.
y
+
qiRotationMatrix
[
0
][
0
]
*
atom2
.
inducedDipolePolar
.
z
,
qiRotationMatrix
[
1
][
1
]
*
atom2
.
inducedDipolePolar
.
x
+
qiRotationMatrix
[
1
][
2
]
*
atom2
.
inducedDipolePolar
.
y
+
qiRotationMatrix
[
1
][
0
]
*
atom2
.
inducedDipolePolar
.
z
,
qiRotationMatrix
[
2
][
1
]
*
atom2
.
inducedDipolePolar
.
x
+
qiRotationMatrix
[
2
][
2
]
*
atom2
.
inducedDipolePolar
.
y
+
qiRotationMatrix
[
2
][
0
]
*
atom2
.
inducedDipolePolar
.
z
);
real3
rotatedDipole1
=
rotateDipole
(
atom1
.
sphericalDipole
,
qiRotationMatrix
);
real3
rotatedDipole2
=
rotateDipole
(
atom2
.
sphericalDipole
,
qiRotationMatrix
);
real
rotatedQuadrupole1
[]
=
{
0
,
0
,
0
,
0
,
0
};
real
rotatedQuadrupole2
[]
=
{
0
,
0
,
0
,
0
,
0
};
#ifdef INCLUDE_QUADRUPOLES
rotateQuadupoles
(
qiRotationMatrix
,
atom1
.
sphericalQuadrupole
,
atom2
.
sphericalQuadrupole
,
rotatedQuadrupole1
,
rotatedQuadrupole2
);
#endif
real
qiQI
[
9
]
=
{
atom1
.
q
,
rotatedDipole1
.
x
,
rotatedDipole1
.
y
,
rotatedDipole1
.
z
,
rotatedQuadrupole1
[
0
],
rotatedQuadrupole1
[
1
],
rotatedQuadrupole1
[
2
],
rotatedQuadrupole1
[
3
],
rotatedQuadrupole1
[
4
]};
real
qiQJ
[
9
]
=
{
atom2
.
q
,
rotatedDipole2
.
x
,
rotatedDipole2
.
y
,
rotatedDipole2
.
z
,
rotatedQuadrupole2
[
0
],
rotatedQuadrupole2
[
1
],
rotatedQuadrupole2
[
2
],
rotatedQuadrupole2
[
3
],
rotatedQuadrupole2
[
4
]};
// The Qtilde{x,y,z} torque intermediates for atoms I and J, which are used to obtain the torques on the permanent moments.
const
real
sqrtThree
=
SQRT
((
real
)
3
);
real
qiQIX
[
9
]
=
{
0
,
qiQI
[
3
],
0
,
-
qiQI
[
1
],
sqrtThree
*
qiQI
[
6
],
qiQI
[
8
],
-
sqrtThree
*
qiQI
[
4
]
-
qiQI
[
7
],
qiQI
[
6
],
-
qiQI
[
5
]};
real
qiQIY
[
9
]
=
{
0
,
-
qiQI
[
2
],
qiQI
[
1
],
0
,
-
sqrtThree
*
qiQI
[
5
],
sqrtThree
*
qiQI
[
4
]
-
qiQI
[
7
],
-
qiQI
[
8
],
qiQI
[
5
],
qiQI
[
6
]};
real
qiQIZ
[
9
]
=
{
0
,
0
,
-
qiQI
[
3
],
qiQI
[
2
],
0
,
-
qiQI
[
6
],
qiQI
[
5
],
-
2
*
qiQI
[
8
],
2
*
qiQI
[
7
]};
real
qiQJX
[
9
]
=
{
0
,
qiQJ
[
3
],
0
,
-
qiQJ
[
1
],
sqrtThree
*
qiQJ
[
6
],
qiQJ
[
8
],
-
sqrtThree
*
qiQJ
[
4
]
-
qiQJ
[
7
],
qiQJ
[
6
],
-
qiQJ
[
5
]};
real
qiQJY
[
9
]
=
{
0
,
-
qiQJ
[
2
],
qiQJ
[
1
],
0
,
-
sqrtThree
*
qiQJ
[
5
],
sqrtThree
*
qiQJ
[
4
]
-
qiQJ
[
7
],
-
qiQJ
[
8
],
qiQJ
[
5
],
qiQJ
[
6
]};
real
qiQJZ
[
9
]
=
{
0
,
0
,
-
qiQJ
[
3
],
qiQJ
[
2
],
0
,
-
qiQJ
[
6
],
qiQJ
[
5
],
-
2
*
qiQJ
[
8
],
2
*
qiQJ
[
7
]};
real
alsq2
=
2
*
EWALD_ALPHA
*
EWALD_ALPHA
;
real
alsq2n
=
0
;
if
(
EWALD_ALPHA
>
0
)
alsq2n
=
RECIP
(
SQRT_PI
*
EWALD_ALPHA
);
// The field derivatives at I due to permanent and induced moments on J, and vice-versa.
// Also, their derivatives w.r.t. R, which are needed for force calculations
real
Vij
[
9
],
Vji
[
9
],
VjiR
[
9
],
VijR
[
9
];
// The field derivatives at I due to only permanent moments on J, and vice-versa.
real
Vijp
[
3
],
Vijd
[
3
],
Vjip
[
3
],
Vjid
[
3
];
real
rInvVec
[
7
],
alphaRVec
[
8
],
bVec
[
5
];
// The rInvVec array is defined such that the ith element is R^-i, with the
// dieleectric constant folded in, to avoid conversions later.
rInvVec
[
1
]
=
rInv
;
for
(
int
i
=
2
;
i
<
7
;
++
i
)
rInvVec
[
i
]
=
rInvVec
[
i
-
1
]
*
rInv
;
// The alpharVec array is defined such that the ith element is (alpha R)^i,
// where kappa (alpha in OpenMM parlance) is the Ewald attenuation parameter.
real
ralpha
=
EWALD_ALPHA
*
r
;
real
exp2a
=
EXP
(
-
(
ralpha
*
ralpha
));
real
rr1
=
RECIP
(
r
);
delta
.
w
=
rr1
;
#ifdef USE_DOUBLE_PRECISION
const
real
erf
c
AlphaR
=
erf
c
(
ralpha
);
const
real
erfAlphaR
=
erf
(
ralpha
);
#else
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 1.5e-7.
const
real
t
=
RECIP
(
1.0
f
+
0.3275911
f
*
ralpha
);
const
real
erf
c
AlphaR
=
(
0.254829592
f
+
(
-
0.284496736
f
+
(
1.421413741
f
+
(
-
1.453152027
f
+
1.061405429
f
*
t
)
*
t
)
*
t
)
*
t
)
*
t
*
exp2a
;
const
real
erfAlphaR
=
1
-
(
0.254829592
f
+
(
-
0.284496736
f
+
(
1.421413741
f
+
(
-
1.453152027
f
+
1.061405429
f
*
t
)
*
t
)
*
t
)
*
t
)
*
t
*
exp2a
;
#endif
real
bn0
=
erfcAlphaR
*
rr1
;
energy
+=
forceFactor
*
atom1
.
q
*
atom2
.
q
*
bn0
;
real
rr2
=
rr1
*
rr1
;
alsq2n
*=
alsq2
;
real4
bn
;
bn
.
x
=
(
bn0
+
alsq2n
*
exp2a
)
*
rr2
;
alsq2n
*=
alsq2
;
bn
.
y
=
(
3
*
bn
.
x
+
alsq2n
*
exp2a
)
*
rr2
;
alsq2n
*=
alsq2
;
bn
.
z
=
(
5
*
bn
.
y
+
alsq2n
*
exp2a
)
*
rr2
;
alsq2n
*=
alsq2
;
bn
.
w
=
(
7
*
bn
.
z
+
alsq2n
*
exp2a
)
*
rr2
;
alsq2n
*=
alsq2
;
real
bn5
=
(
9
*
bn
.
w
+
alsq2n
*
exp2a
)
*
rr2
;
alphaRVec
[
1
]
=
ralpha
;
for
(
int
i
=
2
;
i
<
8
;
++
i
)
alphaRVec
[
i
]
=
alphaRVec
[
i
-
1
]
*
ralpha
;
real3
force
;
real
X
=
2
*
EXP
(
-
alphaRVec
[
2
])
/
SQRT_PI
;
real
uScale
=
1
;
// Delete!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1
if
(
hasExclusions
)
{
computeOneInteractionF1
(
atom1
,
atom2
,
delta
,
bn
,
bn5
,
forceFactor
,
dScale
,
pScale
,
mScale
,
force
,
energy
);
computeOneInteractionF2
(
atom1
,
atom2
,
delta
,
bn
,
forceFactor
,
dScale
,
pScale
,
mScale
,
force
,
energy
);
}
else
{
computeOneInteractionF1NoScale
(
atom1
,
atom2
,
delta
,
bn
,
bn5
,
forceFactor
,
force
,
energy
);
computeOneInteractionF2NoScale
(
atom1
,
atom2
,
delta
,
bn
,
forceFactor
,
force
,
energy
);
int
doubleFactorial
=
1
,
facCount
=
1
;
real
tmp
=
alphaRVec
[
1
];
bVec
[
1
]
=
-
erfAlphaR
;
for
(
int
i
=
2
;
i
<
5
;
++
i
){
bVec
[
i
]
=
bVec
[
i
-
1
]
+
tmp
*
X
/
(
real
)(
doubleFactorial
);
facCount
=
facCount
+
2
;
doubleFactorial
=
doubleFactorial
*
facCount
;
tmp
*=
2
*
alphaRVec
[
2
];
}
atom1
.
force
+=
force
;
if
(
forceFactor
==
1
)
atom2
.
force
-=
force
;
if
(
hasExclusions
)
{
computeOneInteractionT1
(
atom1
,
atom2
,
delta
,
bn
,
dScale
,
pScale
,
mScale
);
computeOneInteractionT2
(
atom1
,
atom2
,
delta
,
bn
,
dScale
,
pScale
,
mScale
);
}
else
{
computeOneInteractionT1NoScale
(
atom1
,
atom2
,
delta
,
bn
);
computeOneInteractionT2NoScale
(
atom1
,
atom2
,
delta
,
bn
);
real
dmp
=
atom1
.
damp
*
atom2
.
damp
;
real
a
=
min
(
atom1
.
thole
,
atom2
.
thole
);
real
u
=
fabs
(
dmp
)
>
1.0e-5
f
?
r
/
dmp
:
1e10
f
;
real
au3
=
a
*
u
*
u
*
u
;
real
expau3
=
au3
<
50
?
EXP
(
-
au3
)
:
0
;
real
a2u6
=
au3
*
au3
;
real
a3u9
=
a2u6
*
au3
;
// Thole damping factors for energies
real
thole_c
=
1
-
expau3
;
real
thole_d0
=
1
-
expau3
*
(
1
+
1.5
f
*
au3
);
real
thole_d1
=
1
-
expau3
;
real
thole_q0
=
1
-
expau3
*
(
1
+
au3
+
a2u6
);
real
thole_q1
=
1
-
expau3
*
(
1
+
au3
);
// Thole damping factors for derivatives
real
dthole_c
=
1
-
expau3
*
(
1
+
1.5
f
*
au3
);
real
dthole_d0
=
1
-
expau3
*
(
1
+
au3
+
1.5
f
*
a2u6
);
real
dthole_d1
=
1
-
expau3
*
(
1
+
au3
);
real
dthole_q0
=
1
-
expau3
*
(
1
+
au3
+
0.25
*
a2u6
+
0.75
*
a3u9
);
real
dthole_q1
=
1
-
expau3
*
(
1
+
au3
+
0.75
*
a2u6
);
// Now we compute the (attenuated) Coulomb operator and its derivatives, contracted with
// permanent moments and induced dipoles. Note that the coefficient of the permanent force
// terms is half of the expected value; this is because we compute the interaction of I with
// the sum of induced and permanent moments on J, as well as the interaction of J with I's
// permanent and induced moments; doing so double counts the permanent-permanent interaction.
real
ePermCoef
,
dPermCoef
,
eUIndCoef
,
dUIndCoef
,
eUInpCoef
,
dUInpCoef
;
// C-C terms (m=0)
ePermCoef
=
rInvVec
[
1
]
*
(
mScale
+
bVec
[
2
]
-
alphaRVec
[
1
]
*
X
);
dPermCoef
=
-
0.5
f
*
(
mScale
+
bVec
[
2
])
*
rInvVec
[
2
];
Vij
[
0
]
=
ePermCoef
*
qiQJ
[
0
];
Vji
[
0
]
=
ePermCoef
*
qiQI
[
0
];
VijR
[
0
]
=
dPermCoef
*
qiQJ
[
0
];
VjiR
[
0
]
=
dPermCoef
*
qiQI
[
0
];
// C-D and C-Uind terms (m=0)
ePermCoef
=
rInvVec
[
2
]
*
(
mScale
+
bVec
[
2
]);
eUIndCoef
=
rInvVec
[
2
]
*
(
dScale
*
thole_c
+
bVec
[
2
]);
eUInpCoef
=
rInvVec
[
2
]
*
(
pScale
*
thole_c
+
bVec
[
2
]);
dPermCoef
=
-
rInvVec
[
3
]
*
(
mScale
+
bVec
[
2
]
+
alphaRVec
[
3
]
*
X
);
dUIndCoef
=
-
2
*
rInvVec
[
3
]
*
(
dScale
*
dthole_c
+
bVec
[
2
]
+
alphaRVec
[
3
]
*
X
);
dUInpCoef
=
-
2
*
rInvVec
[
3
]
*
(
pScale
*
dthole_c
+
bVec
[
2
]
+
alphaRVec
[
3
]
*
X
);
Vij
[
0
]
+=
-
(
ePermCoef
*
qiQJ
[
1
]
+
eUIndCoef
*
qiUindJ
.
x
+
eUInpCoef
*
qiUinpJ
.
x
);
Vji
[
1
]
=
-
(
ePermCoef
*
qiQI
[
0
]);
VijR
[
0
]
+=
-
(
dPermCoef
*
qiQJ
[
1
]
+
dUIndCoef
*
qiUindJ
.
x
+
dUInpCoef
*
qiUinpJ
.
x
);
VjiR
[
1
]
=
-
(
dPermCoef
*
qiQI
[
0
]);
Vjip
[
0
]
=
-
(
eUInpCoef
*
qiQI
[
0
]);
Vjid
[
0
]
=
-
(
eUIndCoef
*
qiQI
[
0
]);
// D-C and Uind-C terms (m=0)
Vij
[
1
]
=
ePermCoef
*
qiQJ
[
0
];
Vji
[
0
]
+=
ePermCoef
*
qiQI
[
1
]
+
eUIndCoef
*
qiUindI
.
x
+
eUInpCoef
*
qiUinpI
.
x
;
VijR
[
1
]
=
dPermCoef
*
qiQJ
[
0
];
VjiR
[
0
]
+=
dPermCoef
*
qiQI
[
1
]
+
dUIndCoef
*
qiUindI
.
x
+
dUInpCoef
*
qiUinpI
.
x
;
Vijp
[
0
]
=
eUInpCoef
*
qiQJ
[
0
];
Vijd
[
0
]
=
eUIndCoef
*
qiQJ
[
0
];
// D-D and D-Uind terms (m=0)
const
real
twoThirds
=
(
real
)
2
/
3
;
ePermCoef
=
-
twoThirds
*
rInvVec
[
3
]
*
(
3
*
(
mScale
+
bVec
[
3
])
+
alphaRVec
[
3
]
*
X
);
eUIndCoef
=
-
twoThirds
*
rInvVec
[
3
]
*
(
3
*
(
dScale
*
thole_d0
+
bVec
[
3
])
+
alphaRVec
[
3
]
*
X
);
eUInpCoef
=
-
twoThirds
*
rInvVec
[
3
]
*
(
3
*
(
pScale
*
thole_d0
+
bVec
[
3
])
+
alphaRVec
[
3
]
*
X
);
dPermCoef
=
rInvVec
[
4
]
*
(
3
*
(
mScale
+
bVec
[
3
])
+
2
*
alphaRVec
[
5
]
*
X
);
dUIndCoef
=
rInvVec
[
4
]
*
(
6
*
(
dScale
*
dthole_d0
+
bVec
[
3
])
+
4
*
alphaRVec
[
5
]
*
X
);
dUInpCoef
=
rInvVec
[
4
]
*
(
6
*
(
pScale
*
dthole_d0
+
bVec
[
3
])
+
4
*
alphaRVec
[
5
]
*
X
);
Vij
[
1
]
+=
ePermCoef
*
qiQJ
[
1
]
+
eUIndCoef
*
qiUindJ
.
x
+
eUInpCoef
*
qiUinpJ
.
x
;
Vji
[
1
]
+=
ePermCoef
*
qiQI
[
1
]
+
eUIndCoef
*
qiUindI
.
x
+
eUInpCoef
*
qiUinpI
.
x
;
VijR
[
1
]
+=
dPermCoef
*
qiQJ
[
1
]
+
dUIndCoef
*
qiUindJ
.
x
+
dUInpCoef
*
qiUinpJ
.
x
;
VjiR
[
1
]
+=
dPermCoef
*
qiQI
[
1
]
+
dUIndCoef
*
qiUindI
.
x
+
dUInpCoef
*
qiUinpI
.
x
;
Vijp
[
0
]
+=
eUInpCoef
*
qiQJ
[
1
];
Vijd
[
0
]
+=
eUIndCoef
*
qiQJ
[
1
];
Vjip
[
0
]
+=
eUInpCoef
*
qiQI
[
1
];
Vjid
[
0
]
+=
eUIndCoef
*
qiQI
[
1
];
// D-D and D-Uind terms (m=1)
ePermCoef
=
rInvVec
[
3
]
*
(
mScale
+
bVec
[
3
]
-
twoThirds
*
alphaRVec
[
3
]
*
X
);
eUIndCoef
=
rInvVec
[
3
]
*
(
dScale
*
thole_d1
+
bVec
[
3
]
-
twoThirds
*
alphaRVec
[
3
]
*
X
);
eUInpCoef
=
rInvVec
[
3
]
*
(
pScale
*
thole_d1
+
bVec
[
3
]
-
twoThirds
*
alphaRVec
[
3
]
*
X
);
dPermCoef
=
-
1.5
f
*
rInvVec
[
4
]
*
(
mScale
+
bVec
[
3
]);
dUIndCoef
=
-
3
*
rInvVec
[
4
]
*
(
dScale
*
dthole_d1
+
bVec
[
3
]);
dUInpCoef
=
-
3
*
rInvVec
[
4
]
*
(
pScale
*
dthole_d1
+
bVec
[
3
]);
Vij
[
2
]
=
ePermCoef
*
qiQJ
[
2
]
+
eUIndCoef
*
qiUindJ
.
y
+
eUInpCoef
*
qiUinpJ
.
y
;
Vji
[
2
]
=
ePermCoef
*
qiQI
[
2
]
+
eUIndCoef
*
qiUindI
.
y
+
eUInpCoef
*
qiUinpI
.
y
;
VijR
[
2
]
=
dPermCoef
*
qiQJ
[
2
]
+
dUIndCoef
*
qiUindJ
.
y
+
dUInpCoef
*
qiUinpJ
.
y
;
VjiR
[
2
]
=
dPermCoef
*
qiQI
[
2
]
+
dUIndCoef
*
qiUindI
.
y
+
dUInpCoef
*
qiUinpI
.
y
;
Vij
[
3
]
=
ePermCoef
*
qiQJ
[
3
]
+
eUIndCoef
*
qiUindJ
.
z
+
eUInpCoef
*
qiUinpJ
.
z
;
Vji
[
3
]
=
ePermCoef
*
qiQI
[
3
]
+
eUIndCoef
*
qiUindI
.
z
+
eUInpCoef
*
qiUinpI
.
z
;
VijR
[
3
]
=
dPermCoef
*
qiQJ
[
3
]
+
dUIndCoef
*
qiUindJ
.
z
+
dUInpCoef
*
qiUinpJ
.
z
;
VjiR
[
3
]
=
dPermCoef
*
qiQI
[
3
]
+
dUIndCoef
*
qiUindI
.
z
+
dUInpCoef
*
qiUinpI
.
z
;
Vijp
[
1
]
=
eUInpCoef
*
qiQJ
[
2
];
Vijd
[
1
]
=
eUIndCoef
*
qiQJ
[
2
];
Vjip
[
1
]
=
eUInpCoef
*
qiQI
[
2
];
Vjid
[
1
]
=
eUIndCoef
*
qiQI
[
2
];
Vijp
[
2
]
=
eUInpCoef
*
qiQJ
[
3
];
Vijd
[
2
]
=
eUIndCoef
*
qiQJ
[
3
];
Vjip
[
2
]
=
eUInpCoef
*
qiQI
[
3
];
Vjid
[
2
]
=
eUIndCoef
*
qiQI
[
3
];
// C-Q terms (m=0)
ePermCoef
=
(
mScale
+
bVec
[
3
])
*
rInvVec
[
3
];
dPermCoef
=
-
((
real
)
1
/
3
)
*
rInvVec
[
4
]
*
(
4.5
*
(
mScale
+
bVec
[
3
])
+
2
*
alphaRVec
[
5
]
*
X
);
Vij
[
0
]
+=
ePermCoef
*
qiQJ
[
4
];
Vji
[
4
]
=
ePermCoef
*
qiQI
[
0
];
VijR
[
0
]
+=
dPermCoef
*
qiQJ
[
4
];
VjiR
[
4
]
=
dPermCoef
*
qiQI
[
0
];
// Q-C terms (m=0)
Vij
[
4
]
=
ePermCoef
*
qiQJ
[
0
];
Vji
[
0
]
+=
ePermCoef
*
qiQI
[
4
];
VijR
[
4
]
=
dPermCoef
*
qiQJ
[
0
];
VjiR
[
0
]
+=
dPermCoef
*
qiQI
[
4
];
// D-Q and Uind-Q terms (m=0)
const
real
fourThirds
=
(
real
)
4
/
3
;
ePermCoef
=
rInvVec
[
4
]
*
(
3
*
(
mScale
+
bVec
[
3
])
+
fourThirds
*
alphaRVec
[
5
]
*
X
);
eUIndCoef
=
rInvVec
[
4
]
*
(
3
*
(
dScale
*
thole_q0
+
bVec
[
3
])
+
fourThirds
*
alphaRVec
[
5
]
*
X
);
eUInpCoef
=
rInvVec
[
4
]
*
(
3
*
(
pScale
*
thole_q0
+
bVec
[
3
])
+
fourThirds
*
alphaRVec
[
5
]
*
X
);
dPermCoef
=
-
fourThirds
*
rInvVec
[
5
]
*
(
4.5
f
*
(
mScale
+
bVec
[
3
])
+
(
1
+
alphaRVec
[
2
])
*
alphaRVec
[
5
]
*
X
);
dUIndCoef
=
-
fourThirds
*
rInvVec
[
5
]
*
(
9
*
(
dScale
*
dthole_q0
+
bVec
[
3
])
+
2
*
(
1
+
alphaRVec
[
2
])
*
alphaRVec
[
5
]
*
X
);
dUInpCoef
=
-
fourThirds
*
rInvVec
[
5
]
*
(
9
*
(
pScale
*
dthole_q0
+
bVec
[
3
])
+
2
*
(
1
+
alphaRVec
[
2
])
*
alphaRVec
[
5
]
*
X
);
Vij
[
1
]
+=
ePermCoef
*
qiQJ
[
4
];
Vji
[
4
]
+=
ePermCoef
*
qiQI
[
1
]
+
eUIndCoef
*
qiUindI
.
x
+
eUInpCoef
*
qiUinpI
.
x
;
VijR
[
1
]
+=
dPermCoef
*
qiQJ
[
4
];
VjiR
[
4
]
+=
dPermCoef
*
qiQI
[
1
]
+
dUIndCoef
*
qiUindI
.
x
+
dUInpCoef
*
qiUinpI
.
x
;
Vijp
[
0
]
+=
eUInpCoef
*
qiQJ
[
4
];
Vijd
[
0
]
+=
eUIndCoef
*
qiQJ
[
4
];
// Q-D and Q-Uind terms (m=0)
Vij
[
4
]
+=
-
(
ePermCoef
*
qiQJ
[
1
]
+
eUIndCoef
*
qiUindJ
.
x
+
eUInpCoef
*
qiUinpJ
.
x
);
Vji
[
1
]
+=
-
(
ePermCoef
*
qiQI
[
4
]);
VijR
[
4
]
+=
-
(
dPermCoef
*
qiQJ
[
1
]
+
dUIndCoef
*
qiUindJ
.
x
+
dUInpCoef
*
qiUinpJ
.
x
);
VjiR
[
1
]
+=
-
(
dPermCoef
*
qiQI
[
4
]);
Vjip
[
0
]
+=
-
(
eUInpCoef
*
qiQI
[
4
]);
Vjid
[
0
]
+=
-
(
eUIndCoef
*
qiQI
[
4
]);
// D-Q and Uind-Q terms (m=1)
ePermCoef
=
-
sqrtThree
*
rInvVec
[
4
]
*
(
mScale
+
bVec
[
3
]);
eUIndCoef
=
-
sqrtThree
*
rInvVec
[
4
]
*
(
dScale
*
thole_q1
+
bVec
[
3
]);
eUInpCoef
=
-
sqrtThree
*
rInvVec
[
4
]
*
(
pScale
*
thole_q1
+
bVec
[
3
]);
const
real
fourSqrtOneThird
=
4
/
sqrt
((
real
)
3
);
dPermCoef
=
fourSqrtOneThird
*
rInvVec
[
5
]
*
(
1.5
f
*
(
mScale
+
bVec
[
3
])
+
0.5
f
*
alphaRVec
[
5
]
*
X
);
dUIndCoef
=
fourSqrtOneThird
*
rInvVec
[
5
]
*
(
3
*
(
dScale
*
dthole_q1
+
bVec
[
3
])
+
alphaRVec
[
5
]
*
X
);
dUInpCoef
=
fourSqrtOneThird
*
rInvVec
[
5
]
*
(
3
*
(
pScale
*
dthole_q1
+
bVec
[
3
])
+
alphaRVec
[
5
]
*
X
);
Vij
[
2
]
+=
ePermCoef
*
qiQJ
[
5
];
Vji
[
5
]
=
ePermCoef
*
qiQI
[
2
]
+
eUIndCoef
*
qiUindI
.
y
+
eUInpCoef
*
qiUinpI
.
y
;
VijR
[
2
]
+=
dPermCoef
*
qiQJ
[
5
];
VjiR
[
5
]
=
dPermCoef
*
qiQI
[
2
]
+
dUIndCoef
*
qiUindI
.
y
+
dUInpCoef
*
qiUinpI
.
y
;
Vij
[
3
]
+=
ePermCoef
*
qiQJ
[
6
];
Vji
[
6
]
=
ePermCoef
*
qiQI
[
3
]
+
eUIndCoef
*
qiUindI
.
z
+
eUInpCoef
*
qiUinpI
.
z
;
VijR
[
3
]
+=
dPermCoef
*
qiQJ
[
6
];
VjiR
[
6
]
=
dPermCoef
*
qiQI
[
3
]
+
dUIndCoef
*
qiUindI
.
z
+
dUInpCoef
*
qiUinpI
.
z
;
Vijp
[
1
]
+=
eUInpCoef
*
qiQJ
[
5
];
Vijd
[
1
]
+=
eUIndCoef
*
qiQJ
[
5
];
Vijp
[
2
]
+=
eUInpCoef
*
qiQJ
[
6
];
Vijd
[
2
]
+=
eUIndCoef
*
qiQJ
[
6
];
// D-Q and Uind-Q terms (m=1)
Vij
[
5
]
=
-
(
ePermCoef
*
qiQJ
[
2
]
+
eUIndCoef
*
qiUindJ
.
y
+
eUInpCoef
*
qiUinpJ
.
y
);
Vji
[
2
]
+=
-
(
ePermCoef
*
qiQI
[
5
]);
VijR
[
5
]
=
-
(
dPermCoef
*
qiQJ
[
2
]
+
dUIndCoef
*
qiUindJ
.
y
+
dUInpCoef
*
qiUinpJ
.
y
);
VjiR
[
2
]
+=
-
(
dPermCoef
*
qiQI
[
5
]);
Vij
[
6
]
=
-
(
ePermCoef
*
qiQJ
[
3
]
+
eUIndCoef
*
qiUindJ
.
z
+
eUInpCoef
*
qiUinpJ
.
z
);
Vji
[
3
]
+=
-
(
ePermCoef
*
qiQI
[
6
]);
VijR
[
6
]
=
-
(
dPermCoef
*
qiQJ
[
3
]
+
dUIndCoef
*
qiUindJ
.
z
+
dUInpCoef
*
qiUinpJ
.
z
);
VjiR
[
3
]
+=
-
(
dPermCoef
*
qiQI
[
6
]);
Vjip
[
1
]
+=
-
(
eUInpCoef
*
qiQI
[
5
]);
Vjid
[
1
]
+=
-
(
eUIndCoef
*
qiQI
[
5
]);
Vjip
[
2
]
+=
-
(
eUInpCoef
*
qiQI
[
6
]);
Vjid
[
2
]
+=
-
(
eUIndCoef
*
qiQI
[
6
]);
// Q-Q terms (m=0)
ePermCoef
=
rInvVec
[
5
]
*
(
6
*
(
mScale
+
bVec
[
4
])
+
((
real
)
4
/
45
)
*
(
-
3
+
10
*
alphaRVec
[
2
])
*
alphaRVec
[
5
]
*
X
);
dPermCoef
=
-
rInvVec
[
6
]
*
(
135
*
(
mScale
+
bVec
[
4
])
+
4
*
(
1
+
2
*
alphaRVec
[
2
])
*
alphaRVec
[
7
]
*
X
)
/
9
;
Vij
[
4
]
+=
ePermCoef
*
qiQJ
[
4
];
Vji
[
4
]
+=
ePermCoef
*
qiQI
[
4
];
VijR
[
4
]
+=
dPermCoef
*
qiQJ
[
4
];
VjiR
[
4
]
+=
dPermCoef
*
qiQI
[
4
];
// Q-Q terms (m=1)
const
real
fourOverFifteen
=
(
real
)
4
/
15
;
ePermCoef
=
-
fourOverFifteen
*
rInvVec
[
5
]
*
(
15
*
(
mScale
+
bVec
[
4
])
+
alphaRVec
[
5
]
*
X
);
dPermCoef
=
rInvVec
[
6
]
*
(
10
*
(
mScale
+
bVec
[
4
])
+
fourThirds
*
alphaRVec
[
7
]
*
X
);
Vij
[
5
]
+=
ePermCoef
*
qiQJ
[
5
];
Vji
[
5
]
+=
ePermCoef
*
qiQI
[
5
];
VijR
[
5
]
+=
dPermCoef
*
qiQJ
[
5
];
VjiR
[
5
]
+=
dPermCoef
*
qiQI
[
5
];
Vij
[
6
]
+=
ePermCoef
*
qiQJ
[
6
];
Vji
[
6
]
+=
ePermCoef
*
qiQI
[
6
];
VijR
[
6
]
+=
dPermCoef
*
qiQJ
[
6
];
VjiR
[
6
]
+=
dPermCoef
*
qiQI
[
6
];
// Q-Q terms (m=2)
ePermCoef
=
rInvVec
[
5
]
*
(
mScale
+
bVec
[
4
]
-
fourOverFifteen
*
alphaRVec
[
5
]
*
X
);
dPermCoef
=
-
2.5
*
(
mScale
+
bVec
[
4
])
*
rInvVec
[
6
];
Vij
[
7
]
=
ePermCoef
*
qiQJ
[
7
];
Vji
[
7
]
=
ePermCoef
*
qiQI
[
7
];
VijR
[
7
]
=
dPermCoef
*
qiQJ
[
7
];
VjiR
[
7
]
=
dPermCoef
*
qiQI
[
7
];
Vij
[
8
]
=
ePermCoef
*
qiQJ
[
8
];
Vji
[
8
]
=
ePermCoef
*
qiQI
[
8
];
VijR
[
8
]
=
dPermCoef
*
qiQJ
[
8
];
VjiR
[
8
]
=
dPermCoef
*
qiQI
[
8
];
// Evaluate the energies, forces and torques due to permanent+induced moments
// interacting with just the permanent moments.
energy
+=
forceFactor
*
0.5
f
*
(
qiQI
[
0
]
*
Vij
[
0
]
+
qiQJ
[
0
]
*
Vji
[
0
]);
real
fIZ
=
qiQI
[
0
]
*
VijR
[
0
];
real
fJZ
=
qiQJ
[
0
]
*
VjiR
[
0
];
real
EIX
=
0
,
EIY
=
0
,
EIZ
=
0
,
EJX
=
0
,
EJY
=
0
,
EJZ
=
0
;
for
(
int
i
=
1
;
i
<
9
;
++
i
){
energy
+=
forceFactor
*
0.5
f
*
(
qiQI
[
i
]
*
Vij
[
i
]
+
qiQJ
[
i
]
*
Vji
[
i
]);
fIZ
+=
qiQI
[
i
]
*
VijR
[
i
];
fJZ
+=
qiQJ
[
i
]
*
VjiR
[
i
];
EIX
+=
qiQIX
[
i
]
*
Vij
[
i
];
EIY
+=
qiQIY
[
i
]
*
Vij
[
i
];
EIZ
+=
qiQIZ
[
i
]
*
Vij
[
i
];
EJX
+=
qiQJX
[
i
]
*
Vji
[
i
];
EJY
+=
qiQJY
[
i
]
*
Vji
[
i
];
EJZ
+=
qiQJZ
[
i
]
*
Vji
[
i
];
}
// Define the torque intermediates for the induced dipoles. These are simply the induced dipole torque
// intermediates dotted with the field due to permanent moments only, at each center. We inline the
// induced dipole torque intermediates here, for simplicity. N.B. There are no torques on the dipoles
// themselves, so we accumulate the torque intermediates into separate variables to allow them to be
// used only in the force calculation.
//
// The torque about the x axis (needed to obtain the y force on the induced dipoles, below)
// qiUindIx[0] = qiQUindI[2]; qiUindIx[1] = 0; qiUindIx[2] = -qiQUindI[0]
real
iEIX
=
qiUinpI
.
z
*
Vijp
[
0
]
+
qiUindI
.
z
*
Vijd
[
0
]
-
qiUinpI
.
x
*
Vijp
[
2
]
-
qiUindI
.
x
*
Vijd
[
2
];
real
iEJX
=
qiUinpJ
.
z
*
Vjip
[
0
]
+
qiUindJ
.
z
*
Vjid
[
0
]
-
qiUinpJ
.
x
*
Vjip
[
2
]
-
qiUindJ
.
x
*
Vjid
[
2
];
// The torque about the y axis (needed to obtain the x force on the induced dipoles, below)
// qiUindIy[0] = -qiQUindI[1]; qiUindIy[1] = qiQUindI[0]; qiUindIy[2] = 0
real
iEIY
=
qiUinpI
.
x
*
Vijp
[
1
]
+
qiUindI
.
x
*
Vijd
[
1
]
-
qiUinpI
.
y
*
Vijp
[
0
]
-
qiUindI
.
y
*
Vijd
[
0
];
real
iEJY
=
qiUinpJ
.
x
*
Vjip
[
1
]
+
qiUindJ
.
x
*
Vjid
[
1
]
-
qiUinpJ
.
y
*
Vjip
[
0
]
-
qiUindJ
.
y
*
Vjid
[
0
];
#ifdef USE_MUTUAL_POLARIZATION
// Uind-Uind terms (m=0)
real
eCoef
=
-
fourThirds
*
rInvVec
[
3
]
*
(
3
*
(
uScale
*
thole_d0
+
bVec
[
3
])
+
alphaRVec
[
3
]
*
X
);
real
dCoef
=
rInvVec
[
4
]
*
(
6
*
(
uScale
*
dthole_d0
+
bVec
[
3
])
+
4
*
alphaRVec
[
5
]
*
X
);
iEIX
+=
eCoef
*
(
qiUinpI
.
z
*
qiUindJ
.
x
+
qiUindI
.
z
*
qiUinpJ
.
x
);
iEJX
+=
eCoef
*
(
qiUinpJ
.
z
*
qiUindI
.
x
+
qiUindJ
.
z
*
qiUinpI
.
x
);
iEIY
-=
eCoef
*
(
qiUinpI
.
y
*
qiUindJ
.
x
+
qiUindI
.
y
*
qiUinpJ
.
x
);
iEJY
-=
eCoef
*
(
qiUinpJ
.
y
*
qiUindI
.
x
+
qiUindJ
.
y
*
qiUinpI
.
x
);
fIZ
+=
dCoef
*
(
qiUinpI
.
x
*
qiUindJ
.
x
+
qiUindI
.
x
*
qiUinpJ
.
x
);
fIZ
+=
dCoef
*
(
qiUinpJ
.
x
*
qiUindI
.
x
+
qiUindJ
.
x
*
qiUinpI
.
x
);
// Uind-Uind terms (m=1)
eCoef
=
2
*
rInvVec
[
3
]
*
(
uScale
*
thole_d1
+
bVec
[
3
]
-
twoThirds
*
alphaRVec
[
3
]
*
X
);
dCoef
=
-
3
*
rInvVec
[
4
]
*
(
uScale
*
dthole_d1
+
bVec
[
3
]);
iEIX
-=
eCoef
*
(
qiUinpI
.
x
*
qiUindJ
.
z
+
qiUindI
.
x
*
qiUinpJ
.
z
);
iEJX
-=
eCoef
*
(
qiUinpJ
.
x
*
qiUindI
.
z
+
qiUindJ
.
x
*
qiUinpI
.
z
);
iEIY
+=
eCoef
*
(
qiUinpI
.
x
*
qiUindJ
.
y
+
qiUindI
.
x
*
qiUinpJ
.
y
);
iEJY
+=
eCoef
*
(
qiUinpJ
.
x
*
qiUindI
.
y
+
qiUindJ
.
x
*
qiUinpI
.
y
);
fIZ
+=
dCoef
*
(
qiUinpI
.
y
*
qiUindJ
.
y
+
qiUindI
.
y
*
qiUinpJ
.
y
+
qiUinpI
.
z
*
qiUindJ
.
z
+
qiUindI
.
z
*
qiUinpJ
.
z
);
fIZ
+=
dCoef
*
(
qiUinpJ
.
y
*
qiUindI
.
y
+
qiUindJ
.
y
*
qiUinpI
.
y
+
qiUinpJ
.
z
*
qiUindI
.
z
+
qiUindJ
.
z
*
qiUinpI
.
z
);
#endif
// The quasi-internal frame forces and torques. Note that the induced torque intermediates are
// used in the force expression, but not in the torques; the induced dipoles are isotropic.
real
qiForce
[
3
]
=
{
rInv
*
(
EIY
+
EJY
+
iEIY
+
iEJY
),
-
rInv
*
(
EIX
+
EJX
+
iEIX
+
iEJX
),
-
(
fJZ
+
fIZ
)};
real
qiTorqueI
[
3
]
=
{
-
EIX
,
-
EIY
,
-
EIZ
};
real
qiTorqueJ
[
3
]
=
{
-
EJX
,
-
EJY
,
-
EJZ
};
real3
force
=
make_real3
(
qiRotationMatrix
[
1
][
1
]
*
qiForce
[
0
]
+
qiRotationMatrix
[
2
][
1
]
*
qiForce
[
1
]
+
qiRotationMatrix
[
0
][
1
]
*
qiForce
[
2
],
qiRotationMatrix
[
1
][
2
]
*
qiForce
[
0
]
+
qiRotationMatrix
[
2
][
2
]
*
qiForce
[
1
]
+
qiRotationMatrix
[
0
][
2
]
*
qiForce
[
2
],
qiRotationMatrix
[
1
][
0
]
*
qiForce
[
0
]
+
qiRotationMatrix
[
2
][
0
]
*
qiForce
[
1
]
+
qiRotationMatrix
[
0
][
0
]
*
qiForce
[
2
]);
atom1
.
force
+=
force
;
atom1
.
torque
+=
make_real3
(
qiRotationMatrix
[
1
][
1
]
*
qiTorqueI
[
0
]
+
qiRotationMatrix
[
2
][
1
]
*
qiTorqueI
[
1
]
+
qiRotationMatrix
[
0
][
1
]
*
qiTorqueI
[
2
],
qiRotationMatrix
[
1
][
2
]
*
qiTorqueI
[
0
]
+
qiRotationMatrix
[
2
][
2
]
*
qiTorqueI
[
1
]
+
qiRotationMatrix
[
0
][
2
]
*
qiTorqueI
[
2
],
qiRotationMatrix
[
1
][
0
]
*
qiTorqueI
[
0
]
+
qiRotationMatrix
[
2
][
0
]
*
qiTorqueI
[
1
]
+
qiRotationMatrix
[
0
][
0
]
*
qiTorqueI
[
2
]);
if
(
forceFactor
==
1
)
{
// T3 == T1 w/ particles I and J reversed
// T4 == T2 w/ particles I and J reversed
delta
.
x
=
-
delta
.
x
;
delta
.
y
=
-
delta
.
y
;
delta
.
z
=
-
delta
.
z
;
if
(
hasExclusions
)
{
computeOneInteractionT1
(
atom2
,
atom1
,
delta
,
bn
,
dScale
,
pScale
,
mScale
);
computeOneInteractionT2
(
atom2
,
atom1
,
delta
,
bn
,
dScale
,
pScale
,
mScale
);
}
else
{
computeOneInteractionT1NoScale
(
atom2
,
atom1
,
delta
,
bn
);
computeOneInteractionT2NoScale
(
atom2
,
atom1
,
delta
,
bn
);
}
atom2
.
force
-=
force
;
atom2
.
torque
+=
make_real3
(
qiRotationMatrix
[
1
][
1
]
*
qiTorqueJ
[
0
]
+
qiRotationMatrix
[
2
][
1
]
*
qiTorqueJ
[
1
]
+
qiRotationMatrix
[
0
][
1
]
*
qiTorqueJ
[
2
],
qiRotationMatrix
[
1
][
2
]
*
qiTorqueJ
[
0
]
+
qiRotationMatrix
[
2
][
2
]
*
qiTorqueJ
[
1
]
+
qiRotationMatrix
[
0
][
2
]
*
qiTorqueJ
[
2
],
qiRotationMatrix
[
1
][
0
]
*
qiTorqueJ
[
0
]
+
qiRotationMatrix
[
2
][
0
]
*
qiTorqueJ
[
1
]
+
qiRotationMatrix
[
0
][
0
]
*
qiTorqueJ
[
2
]);
}
}
/**
* Compute the self energy and self torque.
*/
__device__
void
computeSelfEnergyAndTorque
(
AtomData
&
atom1
,
real
&
energy
)
{
__device__
void
computeSelfEnergyAndTorque
(
AtomData
&
atom1
,
int
atomIndex
,
real
&
energy
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
)
{
real
term
=
2
*
EWALD_ALPHA
*
EWALD_ALPHA
;
real
fterm
=
-
EWALD_ALPHA
/
SQRT_PI
;
real
cii
=
atom1
.
q
*
atom1
.
q
;
real
dii
=
dot
(
atom1
.
dipole
,
atom1
.
dipole
);
real3
dipole
=
make_real3
(
labFrameDipole
[
atomIndex
*
3
],
labFrameDipole
[
atomIndex
*
3
+
1
],
labFrameDipole
[
atomIndex
*
3
+
2
]);
real
dii
=
dot
(
dipole
,
dipole
);
#ifdef INCLUDE_QUADRUPOLES
real
qii
=
2
*
(
atom1
.
quadrupoleXX
*
atom1
.
quadrupoleXX
+
atom1
.
quadrupoleYY
*
atom1
.
quadrupoleYY
+
atom1
.
quadrupoleXX
*
atom1
.
quadrupoleYY
+
atom1
.
quadrupoleXY
*
atom1
.
quadrupoleXY
+
atom1
.
quadrupoleXZ
*
atom1
.
quadrupoleXZ
+
atom1
.
quadrupoleYZ
*
atom1
.
quadrupoleYZ
);
real
quadrupoleXX
=
labFrameQuadrupole
[
atomIndex
*
5
];
real
quadrupoleXY
=
labFrameQuadrupole
[
atomIndex
*
5
+
1
];
real
quadrupoleXZ
=
labFrameQuadrupole
[
atomIndex
*
5
+
2
];
real
quadrupoleYY
=
labFrameQuadrupole
[
atomIndex
*
5
+
3
];
real
quadrupoleYZ
=
labFrameQuadrupole
[
atomIndex
*
5
+
4
];
real
qii
=
2
*
(
quadrupoleXX
*
quadrupoleXX
+
quadrupoleYY
*
quadrupoleYY
+
quadrupoleXX
*
quadrupoleYY
+
quadrupoleXY
*
quadrupoleXY
+
quadrupoleXZ
*
quadrupoleXZ
+
quadrupoleYZ
*
quadrupoleYZ
);
#else
real
qii
=
0
;
#endif
real
uii
=
dot
(
atom1
.
dipole
,
atom1
.
inducedDipole
);
real
uii
=
dot
(
dipole
,
atom1
.
inducedDipole
);
real
selfEnergy
=
(
cii
+
term
*
(
dii
/
3
+
2
*
term
*
qii
/
5
));
selfEnergy
+=
term
*
uii
/
3
;
selfEnergy
*=
fterm
;
...
...
@@ -189,7 +472,7 @@ __device__ void computeSelfEnergyAndTorque(AtomData& atom1, real& energy) {
// self-torque for PME
real3
ui
=
atom1
.
inducedDipole
+
atom1
.
inducedDipolePolar
;
atom1
.
torque
+=
((
2
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT_PI
)
*
cross
(
atom1
.
dipole
,
ui
);
atom1
.
torque
+=
((
2
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT_PI
)
*
cross
(
dipole
,
ui
);
}
/**
...
...
@@ -204,8 +487,9 @@ extern "C" __global__ void computeElectrostatics(
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
#endif
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float2
*
__restrict__
dampingAndThole
)
{
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
sphericalDipole
,
const
real
*
__restrict__
sphericalQuadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float2
*
__restrict__
dampingAndThole
)
{
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
...
...
@@ -223,7 +507,7 @@ extern "C" __global__ void computeElectrostatics(
const
unsigned
int
y
=
tileIndices
.
y
;
AtomData
data
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
loadAtomData
(
data
,
atom1
,
posq
,
labFrameDipole
,
labFrame
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
loadAtomData
(
data
,
atom1
,
posq
,
sphericalDipole
,
spherical
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
data
.
force
=
make_real3
(
0
);
data
.
torque
=
make_real3
(
0
);
uint2
covalent
=
covalentFlags
[
pos
*
TILE_SIZE
+
tgx
];
...
...
@@ -233,13 +517,13 @@ extern "C" __global__ void computeElectrostatics(
localData
[
threadIdx
.
x
].
pos
=
data
.
pos
;
localData
[
threadIdx
.
x
].
q
=
data
.
q
;
localData
[
threadIdx
.
x
].
d
ipole
=
data
.
d
ipole
;
localData
[
threadIdx
.
x
].
sphericalD
ipole
=
data
.
sphericalD
ipole
;
#ifdef INCLUDE_QUADRUPOLES
localData
[
threadIdx
.
x
].
q
uadrupole
XX
=
data
.
q
uadrupole
XX
;
localData
[
threadIdx
.
x
].
q
uadrupole
XY
=
data
.
q
uadrupole
XY
;
localData
[
threadIdx
.
x
].
q
uadrupole
XZ
=
data
.
q
uadrupole
XZ
;
localData
[
threadIdx
.
x
].
q
uadrupole
YY
=
data
.
q
uadrupole
YY
;
localData
[
threadIdx
.
x
].
q
uadrupole
YZ
=
data
.
q
uadrupole
YZ
;
localData
[
threadIdx
.
x
].
sphericalQ
uadrupole
[
0
]
=
data
.
sphericalQ
uadrupole
[
0
]
;
localData
[
threadIdx
.
x
].
sphericalQ
uadrupole
[
1
]
=
data
.
sphericalQ
uadrupole
[
1
]
;
localData
[
threadIdx
.
x
].
sphericalQ
uadrupole
[
2
]
=
data
.
sphericalQ
uadrupole
[
2
]
;
localData
[
threadIdx
.
x
].
sphericalQ
uadrupole
[
3
]
=
data
.
sphericalQ
uadrupole
[
3
]
;
localData
[
threadIdx
.
x
].
sphericalQ
uadrupole
[
4
]
=
data
.
sphericalQ
uadrupole
[
4
]
;
#endif
localData
[
threadIdx
.
x
].
inducedDipole
=
data
.
inducedDipole
;
localData
[
threadIdx
.
x
].
inducedDipolePolar
=
data
.
inducedDipolePolar
;
...
...
@@ -258,7 +542,7 @@ extern "C" __global__ void computeElectrostatics(
}
}
if
(
atom1
<
NUM_ATOMS
)
computeSelfEnergyAndTorque
(
data
,
energy
);
computeSelfEnergyAndTorque
(
data
,
atom1
,
energy
,
labFrameDipole
,
labFrameQuadrupole
);
data
.
force
*=
-
ENERGY_SCALE_FACTOR
;
data
.
torque
*=
ENERGY_SCALE_FACTOR
;
atomicAdd
(
&
forceBuffers
[
atom1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
x
*
0x100000000
)));
...
...
@@ -272,7 +556,7 @@ extern "C" __global__ void computeElectrostatics(
// This is an off-diagonal tile.
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
loadAtomData
(
localData
[
threadIdx
.
x
],
j
,
posq
,
labFrameDipole
,
labFrame
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
loadAtomData
(
localData
[
threadIdx
.
x
],
j
,
posq
,
sphericalDipole
,
spherical
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
localData
[
threadIdx
.
x
].
force
=
make_real3
(
0
);
localData
[
threadIdx
.
x
].
torque
=
make_real3
(
0
);
unsigned
int
tj
=
tgx
;
...
...
@@ -366,7 +650,7 @@ extern "C" __global__ void computeElectrostatics(
// Load atom data for this tile.
AtomData
data
;
loadAtomData
(
data
,
atom1
,
posq
,
labFrameDipole
,
labFrame
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
loadAtomData
(
data
,
atom1
,
posq
,
sphericalDipole
,
spherical
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
data
.
force
=
make_real3
(
0
);
data
.
torque
=
make_real3
(
0
);
#ifdef USE_CUTOFF
...
...
@@ -375,7 +659,7 @@ extern "C" __global__ void computeElectrostatics(
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
#endif
atomIndices
[
threadIdx
.
x
]
=
j
;
loadAtomData
(
localData
[
threadIdx
.
x
],
j
,
posq
,
labFrameDipole
,
labFrame
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
loadAtomData
(
localData
[
threadIdx
.
x
],
j
,
posq
,
sphericalDipole
,
spherical
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
localData
[
threadIdx
.
x
].
force
=
make_real3
(
0
);
localData
[
threadIdx
.
x
].
torque
=
make_real3
(
0
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/sphericalMultipoles.cu
0 → 100644
View file @
72ab8864
__device__
void
buildQIRotationMatrix
(
real3
deltaR
,
real
rInv
,
real
(
&
rotationMatrix
)[
3
][
3
])
{
real3
vectorZ
=
deltaR
*
rInv
;
real3
vectorX
=
vectorZ
;
if
(
deltaR
.
y
!=
0
||
deltaR
.
z
!=
0
)
vectorX
.
x
+=
1
;
else
vectorX
.
y
+=
1
;
vectorX
-=
vectorZ
*
dot
(
vectorX
,
vectorZ
);
vectorX
=
normalize
(
vectorX
);
real3
vectorY
=
cross
(
vectorZ
,
vectorX
);
// Reorder the Cartesian {x,y,z} dipole rotation matrix, to account
// for spherical harmonic ordering {z,x,y}.
rotationMatrix
[
0
][
0
]
=
vectorZ
.
z
;
rotationMatrix
[
0
][
1
]
=
vectorZ
.
x
;
rotationMatrix
[
0
][
2
]
=
vectorZ
.
y
;
rotationMatrix
[
1
][
0
]
=
vectorX
.
z
;
rotationMatrix
[
1
][
1
]
=
vectorX
.
x
;
rotationMatrix
[
1
][
2
]
=
vectorX
.
y
;
rotationMatrix
[
2
][
0
]
=
vectorY
.
z
;
rotationMatrix
[
2
][
1
]
=
vectorY
.
x
;
rotationMatrix
[
2
][
2
]
=
vectorY
.
y
;
}
__device__
real3
rotateDipole
(
real3
&
dipole
,
const
real
(
&
rotationMatrix
)[
3
][
3
])
{
return
make_real3
(
rotationMatrix
[
0
][
0
]
*
dipole
.
x
+
rotationMatrix
[
0
][
1
]
*
dipole
.
y
+
rotationMatrix
[
0
][
2
]
*
dipole
.
z
,
rotationMatrix
[
1
][
0
]
*
dipole
.
x
+
rotationMatrix
[
1
][
1
]
*
dipole
.
y
+
rotationMatrix
[
1
][
2
]
*
dipole
.
z
,
rotationMatrix
[
2
][
0
]
*
dipole
.
x
+
rotationMatrix
[
2
][
1
]
*
dipole
.
y
+
rotationMatrix
[
2
][
2
]
*
dipole
.
z
);
}
__device__
void
rotateQuadupoles
(
const
real
(
&
rotationMatrix
)[
3
][
3
],
const
real
*
quad1
,
const
real
*
quad2
,
real
*
rotated1
,
real
*
rotated2
)
{
real
sqrtThree
=
SQRT
((
real
)
3
);
real
element
;
element
=
0.5
f
*
(
3.0
f
*
rotationMatrix
[
0
][
0
]
*
rotationMatrix
[
0
][
0
]
-
1.0
f
);
rotated1
[
0
]
+=
quad1
[
0
]
*
element
;
rotated2
[
0
]
+=
quad2
[
0
]
*
element
;
element
=
sqrtThree
*
rotationMatrix
[
0
][
0
]
*
rotationMatrix
[
0
][
1
];
rotated1
[
0
]
+=
quad1
[
1
]
*
element
;
rotated2
[
0
]
+=
quad2
[
1
]
*
element
;
element
=
sqrtThree
*
rotationMatrix
[
0
][
0
]
*
rotationMatrix
[
0
][
2
];
rotated1
[
0
]
+=
quad1
[
2
]
*
element
;
rotated2
[
0
]
+=
quad2
[
2
]
*
element
;
element
=
0.5
f
*
sqrtThree
*
(
rotationMatrix
[
0
][
1
]
*
rotationMatrix
[
0
][
1
]
-
rotationMatrix
[
0
][
2
]
*
rotationMatrix
[
0
][
2
]);
rotated1
[
0
]
+=
quad1
[
3
]
*
element
;
rotated2
[
0
]
+=
quad2
[
3
]
*
element
;
element
=
sqrtThree
*
rotationMatrix
[
0
][
1
]
*
rotationMatrix
[
0
][
2
];
rotated1
[
0
]
+=
quad1
[
4
]
*
element
;
rotated2
[
0
]
+=
quad2
[
4
]
*
element
;
element
=
sqrtThree
*
rotationMatrix
[
0
][
0
]
*
rotationMatrix
[
1
][
0
];
rotated1
[
1
]
+=
quad1
[
0
]
*
element
;
rotated2
[
1
]
+=
quad2
[
0
]
*
element
;
element
=
rotationMatrix
[
1
][
0
]
*
rotationMatrix
[
0
][
1
]
+
rotationMatrix
[
0
][
0
]
*
rotationMatrix
[
1
][
1
];
rotated1
[
1
]
+=
quad1
[
1
]
*
element
;
rotated2
[
1
]
+=
quad2
[
1
]
*
element
;
element
=
rotationMatrix
[
1
][
0
]
*
rotationMatrix
[
0
][
2
]
+
rotationMatrix
[
0
][
0
]
*
rotationMatrix
[
1
][
2
];
rotated1
[
1
]
+=
quad1
[
2
]
*
element
;
rotated2
[
1
]
+=
quad2
[
2
]
*
element
;
element
=
rotationMatrix
[
0
][
1
]
*
rotationMatrix
[
1
][
1
]
-
rotationMatrix
[
0
][
2
]
*
rotationMatrix
[
1
][
2
];
rotated1
[
1
]
+=
quad1
[
3
]
*
element
;
rotated2
[
1
]
+=
quad2
[
3
]
*
element
;
element
=
rotationMatrix
[
1
][
1
]
*
rotationMatrix
[
0
][
2
]
+
rotationMatrix
[
0
][
1
]
*
rotationMatrix
[
1
][
2
];
rotated1
[
1
]
+=
quad1
[
4
]
*
element
;
rotated2
[
1
]
+=
quad2
[
4
]
*
element
;
element
=
sqrtThree
*
rotationMatrix
[
0
][
0
]
*
rotationMatrix
[
2
][
0
];
rotated1
[
2
]
+=
quad1
[
0
]
*
element
;
rotated2
[
2
]
+=
quad2
[
0
]
*
element
;
element
=
rotationMatrix
[
2
][
0
]
*
rotationMatrix
[
0
][
1
]
+
rotationMatrix
[
0
][
0
]
*
rotationMatrix
[
2
][
1
];
rotated1
[
2
]
+=
quad1
[
1
]
*
element
;
rotated2
[
2
]
+=
quad2
[
1
]
*
element
;
element
=
rotationMatrix
[
2
][
0
]
*
rotationMatrix
[
0
][
2
]
+
rotationMatrix
[
0
][
0
]
*
rotationMatrix
[
2
][
2
];
rotated1
[
2
]
+=
quad1
[
2
]
*
element
;
rotated2
[
2
]
+=
quad2
[
2
]
*
element
;
element
=
rotationMatrix
[
0
][
1
]
*
rotationMatrix
[
2
][
1
]
-
rotationMatrix
[
0
][
2
]
*
rotationMatrix
[
2
][
2
];
rotated1
[
2
]
+=
quad1
[
3
]
*
element
;
rotated2
[
2
]
+=
quad2
[
3
]
*
element
;
element
=
rotationMatrix
[
2
][
1
]
*
rotationMatrix
[
0
][
2
]
+
rotationMatrix
[
0
][
1
]
*
rotationMatrix
[
2
][
2
];
rotated1
[
2
]
+=
quad1
[
4
]
*
element
;
rotated2
[
2
]
+=
quad2
[
4
]
*
element
;
element
=
0.5
f
*
sqrtThree
*
(
rotationMatrix
[
1
][
0
]
*
rotationMatrix
[
1
][
0
]
-
rotationMatrix
[
2
][
0
]
*
rotationMatrix
[
2
][
0
]);
rotated1
[
3
]
+=
quad1
[
0
]
*
element
;
rotated2
[
3
]
+=
quad2
[
0
]
*
element
;
element
=
rotationMatrix
[
1
][
0
]
*
rotationMatrix
[
1
][
1
]
-
rotationMatrix
[
2
][
0
]
*
rotationMatrix
[
2
][
1
];
rotated1
[
3
]
+=
quad1
[
1
]
*
element
;
rotated2
[
3
]
+=
quad2
[
1
]
*
element
;
element
=
rotationMatrix
[
1
][
0
]
*
rotationMatrix
[
1
][
2
]
-
rotationMatrix
[
2
][
0
]
*
rotationMatrix
[
2
][
2
];
rotated1
[
3
]
+=
quad1
[
2
]
*
element
;
rotated2
[
3
]
+=
quad2
[
2
]
*
element
;
element
=
0.5
f
*
(
rotationMatrix
[
1
][
1
]
*
rotationMatrix
[
1
][
1
]
-
rotationMatrix
[
2
][
1
]
*
rotationMatrix
[
2
][
1
]
-
rotationMatrix
[
1
][
2
]
*
rotationMatrix
[
1
][
2
]
+
rotationMatrix
[
2
][
2
]
*
rotationMatrix
[
2
][
2
]);
rotated1
[
3
]
+=
quad1
[
3
]
*
element
;
rotated2
[
3
]
+=
quad2
[
3
]
*
element
;
element
=
rotationMatrix
[
1
][
1
]
*
rotationMatrix
[
1
][
2
]
-
rotationMatrix
[
2
][
1
]
*
rotationMatrix
[
2
][
2
];
rotated1
[
3
]
+=
quad1
[
4
]
*
element
;
rotated2
[
3
]
+=
quad2
[
4
]
*
element
;
element
=
sqrtThree
*
rotationMatrix
[
1
][
0
]
*
rotationMatrix
[
2
][
0
];
rotated1
[
4
]
+=
quad1
[
0
]
*
element
;
rotated2
[
4
]
+=
quad2
[
0
]
*
element
;
element
=
rotationMatrix
[
2
][
0
]
*
rotationMatrix
[
1
][
1
]
+
rotationMatrix
[
1
][
0
]
*
rotationMatrix
[
2
][
1
];
rotated1
[
4
]
+=
quad1
[
1
]
*
element
;
rotated2
[
4
]
+=
quad2
[
1
]
*
element
;
element
=
rotationMatrix
[
2
][
0
]
*
rotationMatrix
[
1
][
2
]
+
rotationMatrix
[
1
][
0
]
*
rotationMatrix
[
2
][
2
];
rotated1
[
4
]
+=
quad1
[
2
]
*
element
;
rotated2
[
4
]
+=
quad2
[
2
]
*
element
;
element
=
rotationMatrix
[
1
][
1
]
*
rotationMatrix
[
2
][
1
]
-
rotationMatrix
[
1
][
2
]
*
rotationMatrix
[
2
][
2
];
rotated1
[
4
]
+=
quad1
[
3
]
*
element
;
rotated2
[
4
]
+=
quad2
[
3
]
*
element
;
element
=
rotationMatrix
[
2
][
1
]
*
rotationMatrix
[
1
][
2
]
+
rotationMatrix
[
1
][
1
]
*
rotationMatrix
[
2
][
2
];
rotated1
[
4
]
+=
quad1
[
4
]
*
element
;
rotated2
[
4
]
+=
quad2
[
4
]
*
element
;
}
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.cpp
View file @
72ab8864
...
...
@@ -6676,6 +6676,6 @@ RealOpenMM AmoebaReferencePmeMultipoleForce::calculateElectrostatic(const vector
energy += computeReciprocalSpaceInducedDipoleForceAndEnergy(getPolarizationType(), particleData, forces, torques);
energy += computeReciprocalSpaceFixedMultipoleForceAndEnergy(particleData, forces, torques);
energy += calculatePmeSelfEnergy(particleData);
return energy;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment