Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b7088b74
Commit
b7088b74
authored
Aug 10, 2015
by
peastman
Committed by
Robert McGibbon
Aug 27, 2015
Browse files
Python 2/3 compatibility in single code base, plus python 3 testing on travis.
parent
4c00b312
Changes
123
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1062 additions
and
2931 deletions
+1062
-2931
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
+7
-5
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+21
-17
platforms/opencl/src/kernels/nonbonded.cl
platforms/opencl/src/kernels/nonbonded.cl
+8
-8
platforms/opencl/src/kernels/nonbonded_cpu.cl
platforms/opencl/src/kernels/nonbonded_cpu.cl
+8
-8
platforms/opencl/staticTarget/CMakeLists.txt
platforms/opencl/staticTarget/CMakeLists.txt
+1
-1
platforms/opencl/tests/CMakeLists.txt
platforms/opencl/tests/CMakeLists.txt
+1
-1
platforms/reference/src/SimTKReference/ReferenceConstraints.cpp
...rms/reference/src/SimTKReference/ReferenceConstraints.cpp
+26
-26
platforms/reference/tests/CMakeLists.txt
platforms/reference/tests/CMakeLists.txt
+1
-1
plugins/amoeba/CMakeLists.txt
plugins/amoeba/CMakeLists.txt
+2
-2
plugins/amoeba/platforms/cuda/CMakeLists.txt
plugins/amoeba/platforms/cuda/CMakeLists.txt
+1
-1
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+20
-32
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
+2
-0
plugins/amoeba/platforms/cuda/src/kernels/electrostaticPairForce.cu
...oeba/platforms/cuda/src/kernels/electrostaticPairForce.cu
+0
-538
plugins/amoeba/platforms/cuda/src/kernels/electrostaticPairForceNoQuadrupoles.cu
...s/cuda/src/kernels/electrostaticPairForceNoQuadrupoles.cu
+0
-342
plugins/amoeba/platforms/cuda/src/kernels/multipoleElectrostatics.cu
...eba/platforms/cuda/src/kernels/multipoleElectrostatics.cu
+381
-133
plugins/amoeba/platforms/cuda/src/kernels/multipolePme.cu
plugins/amoeba/platforms/cuda/src/kernels/multipolePme.cu
+136
-125
plugins/amoeba/platforms/cuda/src/kernels/multipoles.cu
plugins/amoeba/platforms/cuda/src/kernels/multipoles.cu
+85
-10
plugins/amoeba/platforms/cuda/src/kernels/pmeElectrostaticPairForce.cu
...a/platforms/cuda/src/kernels/pmeElectrostaticPairForce.cu
+0
-947
plugins/amoeba/platforms/cuda/src/kernels/pmeElectrostaticPairForceNoQuadrupoles.cu
...uda/src/kernels/pmeElectrostaticPairForceNoQuadrupoles.cu
+0
-615
plugins/amoeba/platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
.../platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
+362
-119
No files found.
platforms/opencl/src/OpenCLIntegrationUtilities.cpp
View file @
b7088b74
...
...
@@ -220,15 +220,17 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
params
.
push_back
(
mm_float2
(
dist13
,
dist12
));
}
else
throw
OpenMMException
(
"Two of the three distances constrained with SETTLE must be the same."
);
continue
;
// We can't handle this with SETTLE
isShakeAtom
[
atom1
]
=
true
;
isShakeAtom
[
atom2
]
=
true
;
isShakeAtom
[
atom3
]
=
true
;
}
settleAtoms
=
OpenCLArray
::
create
<
mm_int4
>
(
context
,
atoms
.
size
(),
"settleAtoms"
);
settleParams
=
OpenCLArray
::
create
<
mm_float2
>
(
context
,
params
.
size
(),
"settleParams"
);
settleAtoms
->
upload
(
atoms
);
settleParams
->
upload
(
params
);
if
(
atoms
.
size
()
>
0
)
{
settleAtoms
=
OpenCLArray
::
create
<
mm_int4
>
(
context
,
atoms
.
size
(),
"settleAtoms"
);
settleParams
=
OpenCLArray
::
create
<
mm_float2
>
(
context
,
params
.
size
(),
"settleParams"
);
settleAtoms
->
upload
(
atoms
);
settleParams
->
upload
(
params
);
}
}
// Find clusters consisting of a central atom with up to three peripheral atoms.
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
b7088b74
...
...
@@ -186,7 +186,7 @@ static bool compareUshort2(mm_ushort2 a, mm_ushort2 b) {
void
OpenCLNonbondedUtilities
::
initialize
(
const
System
&
system
)
{
if
(
atomExclusions
.
size
()
==
0
)
{
// No exclusions were specifically requested, so just mark every atom as not interacting with itself.
atomExclusions
.
resize
(
context
.
getNumAtoms
());
for
(
int
i
=
0
;
i
<
(
int
)
atomExclusions
.
size
();
i
++
)
atomExclusions
[
i
].
push_back
(
i
);
...
...
@@ -199,7 +199,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
setAtomBlockRange
(
context
.
getContextIndex
()
/
(
double
)
numContexts
,
(
context
.
getContextIndex
()
+
1
)
/
(
double
)
numContexts
);
// Build a list of tiles that contain exclusions.
set
<
pair
<
int
,
int
>
>
tilesWithExclusions
;
for
(
int
atom1
=
0
;
atom1
<
(
int
)
atomExclusions
.
size
();
++
atom1
)
{
int
x
=
atom1
/
OpenCLContext
::
TileSize
;
...
...
@@ -341,14 +341,19 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
forceRebuildNeighborList
=
true
;
setPeriodicBoxArgs
(
context
,
kernels
.
findBlockBoundsKernel
,
1
);
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
kernels
.
sortBoxDataKernel
.
setArg
<
cl_int
>
(
9
,
forceRebuildNeighborList
);
context
.
executeKernel
(
kernels
.
sortBoxDataKernel
,
context
.
getNumAtoms
());
setPeriodicBoxArgs
(
context
,
kernels
.
findInteractingBlocksKernel
,
0
);
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
forceRebuildNeighborList
=
false
;
bool
rebuild
=
false
;
do
{
setPeriodicBoxArgs
(
context
,
kernels
.
findBlockBoundsKernel
,
1
);
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
kernels
.
sortBoxDataKernel
.
setArg
<
cl_int
>
(
9
,
forceRebuildNeighborList
);
context
.
executeKernel
(
kernels
.
sortBoxDataKernel
,
context
.
getNumAtoms
());
setPeriodicBoxArgs
(
context
,
kernels
.
findInteractingBlocksKernel
,
0
);
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
forceRebuildNeighborList
=
false
;
if
(
context
.
getComputeForceCount
()
==
1
)
rebuild
=
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
}
while
(
rebuild
);
lastCutoff
=
kernels
.
cutoffDistance
;
}
...
...
@@ -360,18 +365,16 @@ void OpenCLNonbondedUtilities::computeInteractions(int forceGroups) {
if
(
useCutoff
)
setPeriodicBoxArgs
(
context
,
kernels
.
forceKernel
,
9
);
context
.
executeKernel
(
kernels
.
forceKernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
if
(
context
.
getComputeForceCount
()
==
1
)
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
}
}
void
OpenCLNonbondedUtilities
::
updateNeighborListSize
()
{
bool
OpenCLNonbondedUtilities
::
updateNeighborListSize
()
{
if
(
!
useCutoff
)
return
;
return
false
;
unsigned
int
*
pinnedInteractionCount
=
(
unsigned
int
*
)
context
.
getPinnedBuffer
();
interactionCount
->
download
(
pinnedInteractionCount
);
if
(
pinnedInteractionCount
[
0
]
<=
(
unsigned
int
)
interactingTiles
->
getSize
())
return
;
return
false
;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
...
...
@@ -395,6 +398,7 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
}
forceRebuildNeighborList
=
true
;
return
true
;
}
void
OpenCLNonbondedUtilities
::
setUsePadding
(
bool
padding
)
{
...
...
@@ -410,7 +414,7 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
numTiles
=
(
int
)
(
endFraction
*
totalTiles
)
-
startTileIndex
;
if
(
useCutoff
)
{
// We are using a cutoff, and the kernels have already been created.
for
(
map
<
int
,
KernelSet
>::
iterator
iter
=
groupKernels
.
begin
();
iter
!=
groupKernels
.
end
();
++
iter
)
{
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
...
...
@@ -491,7 +495,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
18
,
rebuildNeighborList
->
getDeviceBuffer
());
if
(
kernels
.
findInteractingBlocksKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
())
<
groupSize
)
{
// The device can't handle this block size, so reduce it.
groupSize
-=
32
;
if
(
groupSize
<
32
)
throw
OpenMMException
(
"Failed to create findInteractingBlocks kernel"
);
...
...
platforms/opencl/src/kernels/nonbonded.cl
View file @
b7088b74
...
...
@@ -25,7 +25,7 @@ __kernel void computeNonbonded(
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
#
ifdef
USE_CUTOFF
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
endif
...
...
@@ -38,7 +38,7 @@ __kernel void computeNonbonded(
__local
AtomData
localData[FORCE_WORK_GROUP_SIZE]
;
//
First
loop:
process
tiles
that
contain
exclusions.
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE+warp*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/totalWarps
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE+
(
warp+1
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/totalWarps
;
for
(
int
pos
=
firstExclusionTile
; pos < lastExclusionTile; pos++) {
...
...
@@ -100,7 +100,7 @@ __kernel void computeNonbonded(
}
else {
// This is an off-diagonal tile.
const unsigned int localAtomIndex = get_local_id(0);
unsigned int j = y*TILE_SIZE + tgx;
real4 tempPosq = posq[j];
...
...
@@ -126,7 +126,7 @@ __kernel void computeNonbonded(
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
PRUNE_BY_CUTOFF
if
(
r2
<
CUTOFF
_SQUARED
)
{
if
(
r2
<
MAX_
CUTOFF
*MAX_CUTOFF
)
{
#
endif
real
invR
=
RSQRT
(
r2
)
;
real
r
=
r2*invR
;
...
...
@@ -213,7 +213,7 @@ __kernel void computeNonbonded(
bool includeTile = true;
// Extract the coordinates of this tile.
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
...
...
@@ -245,7 +245,7 @@ __kernel void computeNonbonded(
}
else
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
}
...
...
@@ -300,7 +300,7 @@ __kernel void computeNonbonded(
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
...
...
@@ -352,7 +352,7 @@ __kernel void computeNonbonded(
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
...
...
platforms/opencl/src/kernels/nonbonded_cpu.cl
View file @
b7088b74
...
...
@@ -22,7 +22,7 @@ __kernel void computeNonbonded(
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
#
ifdef
USE_CUTOFF
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
endif
...
...
@@ -31,7 +31,7 @@ __kernel void computeNonbonded(
__local
AtomData
localData[TILE_SIZE]
;
//
First
loop:
process
tiles
that
contain
exclusions.
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE+get_group_id
(
0
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/get_num_groups
(
0
)
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE+
(
get_group_id
(
0
)
+1
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/get_num_groups
(
0
)
;
for
(
int
pos
=
firstExclusionTile
; pos < lastExclusionTile; pos++) {
...
...
@@ -70,7 +70,7 @@ __kernel void computeNonbonded(
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF
_SQUARED
)
{
if
(
r2
<
MAX_
CUTOFF
*MAX_CUTOFF
)
{
#
endif
real
invR
=
RSQRT
(
r2
)
;
real
r
=
r2*invR
;
...
...
@@ -138,7 +138,7 @@ __kernel void computeNonbonded(
#endif
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
...
...
@@ -228,9 +228,9 @@ __kernel void computeNonbonded(
while (pos < end) {
const bool hasExclusions = false;
bool includeTile = true;
// Extract the coordinates of this tile.
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
...
...
@@ -304,7 +304,7 @@ __kernel void computeNonbonded(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real r2 = dot(delta.xyz, delta.xyz);
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
real invR = RSQRT(r2);
real r = r2*invR;
unsigned int atom2 = j;
...
...
@@ -367,7 +367,7 @@ __kernel void computeNonbonded(
#endif
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
...
...
platforms/opencl/staticTarget/CMakeLists.txt
View file @
b7088b74
...
...
@@ -15,6 +15,6 @@ ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${AP
TARGET_LINK_LIBRARIES
(
${
STATIC_TARGET
}
${
OPENMM_LIBRARY_NAME
}
${
OPENCL_LIBRARIES
}
${
PTHREADS_LIB_STATIC
}
)
#-DPTW32_STATIC_LIB only works for the windows pthreads.
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
COMPILE
_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_OPENCL_BUILDING_STATIC_LIBRARY -DPTW32_STATIC_LIB"
)
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
LINK
_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_OPENCL_BUILDING_STATIC_LIBRARY -DPTW32_STATIC_LIB"
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
STATIC_TARGET
}
)
platforms/opencl/tests/CMakeLists.txt
View file @
b7088b74
...
...
@@ -25,7 +25,7 @@ FOREACH(TEST_PROG ${TEST_PROGS})
# Link with shared library
ADD_EXECUTABLE
(
${
TEST_ROOT
}
${
TEST_PROG
}
)
TARGET_LINK_LIBRARIES
(
${
TEST_ROOT
}
${
SHARED_TARGET
}
)
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
COMPILE
_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
LINK
_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
ADD_TEST
(
${
TEST_ROOT
}
Single
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
single
)
IF
(
OPENMM_BUILD_OPENCL_DOUBLE_PRECISION_TESTS
)
...
...
platforms/reference/src/SimTKReference/ReferenceConstraints.cpp
View file @
b7088b74
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors.
*
* Portions copyright (c) 2013
-2015
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -98,14 +98,13 @@ ReferenceConstraints::ReferenceConstraints(const System& system) : ccma(NULL), s
// Record the SETTLE clusters.
vector
<
bool
>
isSettleAtom
(
numParticles
,
false
);
int
numSETTLE
=
settleClusters
.
size
();
if
(
numSETTLE
>
0
)
{
vector
<
int
>
atom1
(
numSETTLE
);
vector
<
int
>
atom2
(
numSETTLE
);
vector
<
int
>
atom3
(
numSETTLE
);
vector
<
RealOpenMM
>
distance1
(
numSETTLE
);
vector
<
RealOpenMM
>
distance2
(
numSETTLE
);
for
(
int
i
=
0
;
i
<
numSETTLE
;
i
++
)
{
if
(
settleClusters
.
size
()
>
0
)
{
vector
<
int
>
atom1
;
vector
<
int
>
atom2
;
vector
<
int
>
atom3
;
vector
<
RealOpenMM
>
distance1
;
vector
<
RealOpenMM
>
distance2
;
for
(
int
i
=
0
;
i
<
settleClusters
.
size
();
i
++
)
{
int
p1
=
settleClusters
[
i
];
int
p2
=
settleConstraints
[
p1
].
begin
()
->
first
;
int
p3
=
(
++
settleConstraints
[
p1
].
begin
())
->
first
;
...
...
@@ -114,35 +113,36 @@ ReferenceConstraints::ReferenceConstraints(const System& system) : ccma(NULL), s
float
dist23
=
settleConstraints
[
p2
].
find
(
p3
)
->
second
;
if
(
dist12
==
dist13
)
{
// p1 is the central atom
atom1
[
i
]
=
p1
;
atom2
[
i
]
=
p2
;
atom3
[
i
]
=
p3
;
distance1
[
i
]
=
dist12
;
distance2
[
i
]
=
dist23
;
atom1
.
push_back
(
p1
)
;
atom2
.
push_back
(
p2
)
;
atom3
.
push_back
(
p3
)
;
distance1
.
push_back
(
dist12
)
;
distance2
.
push_back
(
dist23
)
;
}
else
if
(
dist12
==
dist23
)
{
// p2 is the central atom
atom1
[
i
]
=
p2
;
atom2
[
i
]
=
p1
;
atom3
[
i
]
=
p3
;
distance1
[
i
]
=
dist12
;
distance2
[
i
]
=
dist13
;
atom1
.
push_back
(
p2
)
;
atom2
.
push_back
(
p1
)
;
atom3
.
push_back
(
p3
)
;
distance1
.
push_back
(
dist12
)
;
distance2
.
push_back
(
dist13
)
;
}
else
if
(
dist13
==
dist23
)
{
// p3 is the central atom
atom1
[
i
]
=
p3
;
atom2
[
i
]
=
p1
;
atom3
[
i
]
=
p2
;
distance1
[
i
]
=
dist13
;
distance2
[
i
]
=
dist12
;
atom1
.
push_back
(
p3
)
;
atom2
.
push_back
(
p1
)
;
atom3
.
push_back
(
p2
)
;
distance1
.
push_back
(
dist13
)
;
distance2
.
push_back
(
dist12
)
;
}
else
throw
OpenMMException
(
"Two of the three distances constrained with SETTLE must be the same."
);
continue
;
// We can't handle this with SETTLE
isSettleAtom
[
p1
]
=
true
;
isSettleAtom
[
p2
]
=
true
;
isSettleAtom
[
p3
]
=
true
;
}
settle
=
new
ReferenceSETTLEAlgorithm
(
atom1
,
atom2
,
atom3
,
distance1
,
distance2
,
masses
);
if
(
atom1
.
size
()
>
0
)
settle
=
new
ReferenceSETTLEAlgorithm
(
atom1
,
atom2
,
atom3
,
distance1
,
distance2
,
masses
);
}
// All other constraints are handled with CCMA.
...
...
platforms/reference/tests/CMakeLists.txt
View file @
b7088b74
...
...
@@ -15,7 +15,7 @@ FOREACH(TEST_PROG ${TEST_PROGS})
ELSE
(
OPENMM_BUILD_SHARED_LIB
)
TARGET_LINK_LIBRARIES
(
${
TEST_ROOT
}
${
STATIC_TARGET
}
)
ENDIF
(
OPENMM_BUILD_SHARED_LIB
)
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
COMPILE
_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
LINK
_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
ADD_TEST
(
${
TEST_ROOT
}
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
)
ENDFOREACH
(
TEST_PROG
${
TEST_PROGS
}
)
...
...
plugins/amoeba/CMakeLists.txt
View file @
b7088b74
...
...
@@ -86,14 +86,14 @@ ENDIF(OPENMM_BUILD_C_AND_FORTRAN_WRAPPERS)
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/src
)
ADD_LIBRARY
(
${
SHARED_AMOEBA_TARGET
}
SHARED
${
SOURCE_AMOEBA_FILES
}
${
SOURCE_AMOEBA_INCLUDE_FILES
}
${
API_AMOEBA_ABS_INCLUDE_FILES
}
)
SET_TARGET_PROPERTIES
(
${
SHARED_AMOEBA_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
COMPILE
_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_AMOEBA_BUILDING_SHARED_LIBRARY -DLEPTON_BUILDING_SHARED_LIBRARY"
)
SET_TARGET_PROPERTIES
(
${
SHARED_AMOEBA_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
LINK
_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_AMOEBA_BUILDING_SHARED_LIBRARY -DLEPTON_BUILDING_SHARED_LIBRARY"
)
FILE
(
GLOB serialization_files
${
CMAKE_CUURENT_SOURCE_DIR
}
/serialization/src/*.cpp
)
SET_SOURCE_FILES_PROPERTIES
(
${
serialization_files
}
PROPERTIES COMPILE_FLAGS
"-DOPENMM_AMOEBA_BUILDING_SHARED_LIBRARY -DTIXML_USE_STL"
)
IF
(
OPENMM_BUILD_STATIC_LIB
)
ADD_LIBRARY
(
${
STATIC_AMOEBA_TARGET
}
STATIC
${
SOURCE_AMOEBA_FILES
}
${
SOURCE_AMOEBA_INCLUDE_FILES
}
${
API_AMOEBA_ABS_INCLUDE_FILES
}
)
SET_TARGET_PROPERTIES
(
${
STATIC_AMOEBA_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
COMPILE
_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_AMOEBA_BUILDING_STATIC_LIBRARY -DOPENMM_USE_STATIC_LIBRARIES -DOPENMM_BUILDING_STATIC_LIBRARY -DLEPTON_USE_STATIC_LIBRARIES -DLEPTON_BUILDING_STATIC_LIBRARY"
)
SET_TARGET_PROPERTIES
(
${
STATIC_AMOEBA_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
LINK
_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_AMOEBA_BUILDING_STATIC_LIBRARY -DOPENMM_USE_STATIC_LIBRARIES -DOPENMM_BUILDING_STATIC_LIBRARY -DLEPTON_USE_STATIC_LIBRARIES -DLEPTON_BUILDING_STATIC_LIBRARY"
)
ENDIF
(
OPENMM_BUILD_STATIC_LIB
)
IF
(
OPENMM_BUILD_C_AND_FORTRAN_WRAPPERS
)
...
...
plugins/amoeba/platforms/cuda/CMakeLists.txt
View file @
b7088b74
...
...
@@ -94,7 +94,7 @@ SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE
IF
(
APPLE
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-F/Library/Frameworks -framework CUDA"
)
ELSE
(
APPLE
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
COMPILE
_FLAGS
}
"
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
LINK
_FLAGS
}
"
)
ENDIF
(
APPLE
)
INSTALL
(
TARGETS
${
SHARED_TARGET
}
DESTINATION
${
CMAKE_INSTALL_PREFIX
}
/lib/plugins
)
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
b7088b74
...
...
@@ -806,8 +806,8 @@ private:
CudaCalcAmoebaMultipoleForceKernel
::
CudaCalcAmoebaMultipoleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcAmoebaMultipoleForceKernel
(
name
,
platform
),
cu
(
cu
),
system
(
system
),
hasInitializedScaleFactors
(
false
),
hasInitializedFFT
(
false
),
multipolesAreValid
(
false
),
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
frac
Dipoles
(
NULL
),
fracQuadrupoles
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
inducedField
(
NULL
),
inducedFieldPolar
(
NULL
),
torque
(
NULL
),
dampingAndThole
(
NULL
),
inducedDipole
(
NULL
),
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
spherical
Dipoles
(
NULL
),
sphericalQuadrupoles
(
NULL
),
fracDipoles
(
NULL
),
fracQuadrupoles
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
inducedField
(
NULL
),
inducedFieldPolar
(
NULL
),
torque
(
NULL
),
dampingAndThole
(
NULL
),
inducedDipole
(
NULL
),
diisCoefficients
(
NULL
),
inducedDipolePolar
(
NULL
),
inducedDipoleErrors
(
NULL
),
prevDipoles
(
NULL
),
prevDipolesPolar
(
NULL
),
prevDipolesGk
(
NULL
),
prevDipolesGkPolar
(
NULL
),
prevErrors
(
NULL
),
diisMatrix
(
NULL
),
polarizability
(
NULL
),
covalentFlags
(
NULL
),
polarizationGroupFlags
(
NULL
),
pmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeIgrid
(
NULL
),
pmePhi
(
NULL
),
...
...
@@ -826,6 +826,10 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete
labFrameDipoles
;
if
(
labFrameQuadrupoles
!=
NULL
)
delete
labFrameQuadrupoles
;
if
(
sphericalDipoles
!=
NULL
)
delete
sphericalDipoles
;
if
(
sphericalQuadrupoles
!=
NULL
)
delete
sphericalQuadrupoles
;
if
(
fracDipoles
!=
NULL
)
delete
fracDipoles
;
if
(
fracQuadrupoles
!=
NULL
)
...
...
@@ -985,6 +989,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
labFrameDipoles
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"labFrameDipoles"
);
labFrameQuadrupoles
=
new
CudaArray
(
cu
,
5
*
paddedNumAtoms
,
elementSize
,
"labFrameQuadrupoles"
);
sphericalDipoles
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"sphericalDipoles"
);
sphericalQuadrupoles
=
new
CudaArray
(
cu
,
5
*
paddedNumAtoms
,
elementSize
,
"sphericalQuadrupoles"
);
fracDipoles
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"fracDipoles"
);
fracQuadrupoles
=
new
CudaArray
(
cu
,
6
*
paddedNumAtoms
,
elementSize
,
"fracQuadrupoles"
);
field
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"field"
);
...
...
@@ -1144,6 +1150,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
if
(
maxInducedIterations
>
0
)
{
defines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
inducedFieldThreads
);
defines
[
"MAX_PREV_DIIS_DIPOLES"
]
=
cu
.
intToString
(
MaxPrevDIISDipoles
);
defines
[
"USE_MUTUAL_POLARIZATION"
]
=
"1"
;
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoleInducedField
,
defines
);
computeInducedFieldKernel
=
cu
.
getKernel
(
module
,
"computeInducedField"
);
updateInducedFieldKernel
=
cu
.
getKernel
(
module
,
"updateInducedFieldByDIIS"
);
...
...
@@ -1151,33 +1158,13 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
buildMatrixKernel
=
cu
.
getKernel
(
module
,
"computeDIISMatrix"
);
}
stringstream
electrostaticsSource
;
if
(
usePME
)
{
electrostaticsSource
<<
CudaKernelSources
::
vectorOps
;
electrostaticsSource
<<
CudaKernelSources
::
vectorOps
;
electrostaticsSource
<<
CudaAmoebaKernelSources
::
sphericalMultipoles
;
if
(
usePME
)
electrostaticsSource
<<
CudaAmoebaKernelSources
::
pmeMultipoleElectrostatics
;
electrostaticsSource
<<
(
hasQuadrupoles
?
CudaAmoebaKernelSources
::
pmeElectrostaticPairForce
:
CudaAmoebaKernelSources
::
pmeElectrostaticPairForceNoQuadrupoles
);
electrostaticsSource
<<
"#define APPLY_SCALE
\n
"
;
electrostaticsSource
<<
(
hasQuadrupoles
?
CudaAmoebaKernelSources
::
pmeElectrostaticPairForce
:
CudaAmoebaKernelSources
::
pmeElectrostaticPairForceNoQuadrupoles
);
electrostaticsThreadMemory
=
24
*
elementSize
+
3
*
sizeof
(
float
)
+
3
*
sizeof
(
int
)
/
(
double
)
cu
.
TileSize
;
if
(
!
useShuffle
)
electrostaticsThreadMemory
+=
3
*
elementSize
;
}
else
{
electrostaticsSource
<<
CudaKernelSources
::
vectorOps
;
else
electrostaticsSource
<<
CudaAmoebaKernelSources
::
multipoleElectrostatics
;
electrostaticsSource
<<
"#define F1
\n
"
;
electrostaticsSource
<<
(
hasQuadrupoles
?
CudaAmoebaKernelSources
::
electrostaticPairForce
:
CudaAmoebaKernelSources
::
electrostaticPairForceNoQuadrupoles
);
electrostaticsSource
<<
"#undef F1
\n
"
;
electrostaticsSource
<<
"#define T1
\n
"
;
electrostaticsSource
<<
(
hasQuadrupoles
?
CudaAmoebaKernelSources
::
electrostaticPairForce
:
CudaAmoebaKernelSources
::
electrostaticPairForceNoQuadrupoles
);
electrostaticsSource
<<
"#undef T1
\n
"
;
electrostaticsSource
<<
"#define T3
\n
"
;
electrostaticsSource
<<
(
hasQuadrupoles
?
CudaAmoebaKernelSources
::
electrostaticPairForce
:
CudaAmoebaKernelSources
::
electrostaticPairForceNoQuadrupoles
);
electrostaticsThreadMemory
=
21
*
elementSize
+
2
*
sizeof
(
float
)
+
3
*
sizeof
(
int
)
/
(
double
)
cu
.
TileSize
;
if
(
!
useShuffle
)
electrostaticsThreadMemory
+=
3
*
elementSize
;
if
(
gk
!=
NULL
)
electrostaticsThreadMemory
+=
4
*
elementSize
;
}
electrostaticsThreadMemory
=
24
*
elementSize
+
3
*
sizeof
(
float
)
+
3
*
sizeof
(
int
)
/
(
double
)
cu
.
TileSize
;
electrostaticsThreads
=
min
(
maxThreads
,
cu
.
computeThreadBlockSize
(
electrostaticsThreadMemory
));
defines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
electrostaticsThreads
);
module
=
cu
.
createModule
(
electrostaticsSource
.
str
(),
defines
);
...
...
@@ -1433,7 +1420,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void
*
computeMomentsArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
multipoleParticles
->
getDevicePointer
(),
&
molecularDipoles
->
getDevicePointer
(),
&
molecularQuadrupoles
->
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
()};
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
sphericalDipoles
->
getDevicePointer
(),
&
sphericalQuadrupoles
->
getDevicePointer
()};
cu
.
executeKernel
(
computeMomentsKernel
,
computeMomentsArgs
,
cu
.
getNumAtoms
());
int
startTileIndex
=
nb
.
getStartTileIndex
();
int
numTileIndices
=
nb
.
getNumTiles
();
...
...
@@ -1497,8 +1485,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void
*
electrostaticsArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
torque
->
getDevicePointer
(),
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
covalentFlags
->
getDevicePointer
(),
&
polarizationGroupFlags
->
getDevicePointer
(),
&
nb
.
getExclusionTiles
().
getDevicePointer
(),
&
startTileIndex
,
&
numTileIndices
,
&
labFrame
Dipoles
->
getDevicePointer
(),
&
labFrame
Quadrupoles
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
&
spherical
Dipoles
->
getDevicePointer
(),
&
spherical
Quadrupoles
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
electrostaticsKernel
,
electrostaticsArgs
,
numForceThreadBlocks
*
electrostaticsThreads
,
electrostaticsThreads
);
if
(
gkKernel
!=
NULL
)
gkKernel
->
finishComputation
(
*
torque
,
*
labFrameDipoles
,
*
labFrameQuadrupoles
,
*
inducedDipole
,
*
inducedDipolePolar
,
*
dampingAndThole
,
*
covalentFlags
,
*
polarizationGroupFlags
);
...
...
@@ -1652,8 +1640,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&
nb
.
getInteractingTiles
().
getDevicePointer
(),
&
nb
.
getInteractionCount
().
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
&
maxTiles
,
&
nb
.
getBlockCenters
().
getDevicePointer
(),
&
nb
.
getInteractingAtoms
().
getDevicePointer
(),
&
labFrame
Dipoles
->
getDevicePointer
(),
&
labFrame
Quadrupoles
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
&
spherical
Dipoles
->
getDevicePointer
(),
&
spherical
Quadrupoles
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
electrostaticsKernel
,
electrostaticsArgs
,
numForceThreadBlocks
*
electrostaticsThreads
,
electrostaticsThreads
);
void
*
pmeTransformInducedPotentialArgs
[]
=
{
&
pmePhidp
->
getDevicePointer
(),
&
pmeCphi
->
getDevicePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeTransformPotentialKernel
,
pmeTransformInducedPotentialArgs
,
cu
.
getNumAtoms
());
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
View file @
b7088b74
...
...
@@ -392,6 +392,8 @@ private:
CudaArray
*
molecularQuadrupoles
;
CudaArray
*
labFrameDipoles
;
CudaArray
*
labFrameQuadrupoles
;
CudaArray
*
sphericalDipoles
;
CudaArray
*
sphericalQuadrupoles
;
CudaArray
*
fracDipoles
;
CudaArray
*
fracQuadrupoles
;
CudaArray
*
field
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/electrostaticPairForce.cu
deleted
100644 → 0
View file @
4c00b312
/**
* This defines three different closely related functions, depending on which constant (F1, T1, or T3) is defined.
*/
#if defined F1
__device__
void
computeOneInteractionF1
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
float
dScale
,
float
pScale
,
float
mScale
,
real
&
energy
,
real3
&
outputForce
)
{
#elif defined T1
__device__
void
computeOneInteractionT1
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
float
dScale
,
float
pScale
,
float
mScale
,
real3
&
outputForce
)
{
#else
__device__
void
computeOneInteractionT3
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
float
dScale
,
float
pScale
,
float
mScale
,
real3
&
outputForce
)
{
#endif
#ifdef F1
const
float
uScale
=
1
;
real
ddsc3_0
=
0
;
real
ddsc3_1
=
0
;
real
ddsc3_2
=
0
;
real
ddsc5_0
=
0
;
real
ddsc5_1
=
0
;
real
ddsc5_2
=
0
;
real
ddsc7_0
=
0
;
real
ddsc7_1
=
0
;
real
ddsc7_2
=
0
;
#endif
real
xr
=
atom2
.
posq
.
x
-
atom1
.
posq
.
x
;
real
yr
=
atom2
.
posq
.
y
-
atom1
.
posq
.
y
;
real
zr
=
atom2
.
posq
.
z
-
atom1
.
posq
.
z
;
real
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
real
r
=
SQRT
(
r2
);
real
rr1
=
RECIP
(
r
);
real
rr2
=
rr1
*
rr1
;
real
rr3
=
rr1
*
rr2
;
real
rr5
=
3
*
rr3
*
rr2
;
real
rr7
=
5
*
rr5
*
rr2
;
real
rr9
=
7
*
rr7
*
rr2
;
#ifdef F1
real
rr11
=
9
*
rr9
*
rr2
;
#endif
real
scale3
=
1
;
real
scale5
=
1
;
real
scale7
=
1
;
real
pdamp
=
atom1
.
damp
*
atom2
.
damp
;
if
(
pdamp
!=
0
)
{
real
ratio
=
r
/
pdamp
;
float
pGamma
=
atom2
.
thole
>
atom1
.
thole
?
atom1
.
thole
:
atom2
.
thole
;
real
damp
=
ratio
*
ratio
*
ratio
*
pGamma
;
real
dampExp
=
EXP
(
-
damp
);
real
damp1
=
damp
+
1
;
real
damp2
=
damp
*
damp
;
scale3
=
1
-
dampExp
;
scale5
=
1
-
damp1
*
dampExp
;
scale7
=
1
-
(
damp1
+
0.6
f
*
damp2
)
*
dampExp
;
#ifdef F1
real
factor
=
3
*
damp
*
dampExp
*
rr2
;
real
factor7
=
-
0.2
f
+
0.6
f
*
damp
;
ddsc3_0
=
factor
*
xr
;
ddsc5_0
=
ddsc3_0
*
damp
;
ddsc7_0
=
ddsc5_0
*
factor7
;
ddsc3_1
=
factor
*
yr
;
ddsc5_1
=
ddsc3_1
*
damp
;
ddsc7_1
=
ddsc5_1
*
factor7
;
ddsc3_2
=
factor
*
zr
;
ddsc5_2
=
ddsc3_2
*
damp
;
ddsc7_2
=
ddsc5_2
*
factor7
;
#endif
}
#if defined F1
real
scale3i
=
rr3
*
scale3
*
uScale
;
real
scale5i
=
rr5
*
scale5
*
uScale
;
#endif
real
dsc3
=
rr3
*
scale3
*
dScale
;
real
psc3
=
rr3
*
scale3
*
pScale
;
real
dsc5
=
rr5
*
scale5
*
dScale
;
real
psc5
=
rr5
*
scale5
*
pScale
;
real
dsc7
=
rr7
*
scale7
*
dScale
;
real
psc7
=
rr7
*
scale7
*
pScale
;
real
atom2quadrupoleZZ
=
-
(
atom2
.
quadrupoleXX
+
atom2
.
quadrupoleYY
);
real
qJr_0
=
atom2
.
quadrupoleXX
*
xr
+
atom2
.
quadrupoleXY
*
yr
+
atom2
.
quadrupoleXZ
*
zr
;
real
qJr_1
=
atom2
.
quadrupoleXY
*
xr
+
atom2
.
quadrupoleYY
*
yr
+
atom2
.
quadrupoleYZ
*
zr
;
real
qJr_2
=
atom2
.
quadrupoleXZ
*
xr
+
atom2
.
quadrupoleYZ
*
yr
+
atom2quadrupoleZZ
*
zr
;
real
atom1quadrupoleZZ
=
-
(
atom1
.
quadrupoleXX
+
atom1
.
quadrupoleYY
);
real
qIr_0
=
atom1
.
quadrupoleXX
*
xr
+
atom1
.
quadrupoleXY
*
yr
+
atom1
.
quadrupoleXZ
*
zr
;
real
qIr_1
=
atom1
.
quadrupoleXY
*
xr
+
atom1
.
quadrupoleYY
*
yr
+
atom1
.
quadrupoleYZ
*
zr
;
real
qIr_2
=
atom1
.
quadrupoleXZ
*
xr
+
atom1
.
quadrupoleYZ
*
yr
+
atom1quadrupoleZZ
*
zr
;
#if defined F1
real
sc2
=
atom1
.
dipole
.
x
*
atom2
.
dipole
.
x
+
atom1
.
dipole
.
y
*
atom2
.
dipole
.
y
+
atom1
.
dipole
.
z
*
atom2
.
dipole
.
z
;
#endif
#if defined F1 || defined T1
real
sc4
=
atom2
.
dipole
.
x
*
xr
+
atom2
.
dipole
.
y
*
yr
+
atom2
.
dipole
.
z
*
zr
;
real
sc6
=
qJr_0
*
xr
+
qJr_1
*
yr
+
qJr_2
*
zr
;
#endif
#if defined F1 || defined T3
real
sc3
=
atom1
.
dipole
.
x
*
xr
+
atom1
.
dipole
.
y
*
yr
+
atom1
.
dipole
.
z
*
zr
;
real
sc5
=
qIr_0
*
xr
+
qIr_1
*
yr
+
qIr_2
*
zr
;
#endif
#if defined F1
real
sc7
=
qIr_0
*
atom2
.
dipole
.
x
+
qIr_1
*
atom2
.
dipole
.
y
+
qIr_2
*
atom2
.
dipole
.
z
;
real
sc8
=
qJr_0
*
atom1
.
dipole
.
x
+
qJr_1
*
atom1
.
dipole
.
y
+
qJr_2
*
atom1
.
dipole
.
z
;
real
sc9
=
qIr_0
*
qJr_0
+
qIr_1
*
qJr_1
+
qIr_2
*
qJr_2
;
real
sc10
=
atom1
.
quadrupoleXX
*
atom2
.
quadrupoleXX
+
atom1
.
quadrupoleXY
*
atom2
.
quadrupoleXY
+
atom1
.
quadrupoleXZ
*
atom2
.
quadrupoleXZ
+
atom1
.
quadrupoleXY
*
atom2
.
quadrupoleXY
+
atom1
.
quadrupoleYY
*
atom2
.
quadrupoleYY
+
atom1
.
quadrupoleYZ
*
atom2
.
quadrupoleYZ
+
atom1
.
quadrupoleXZ
*
atom2
.
quadrupoleXZ
+
atom1
.
quadrupoleYZ
*
atom2
.
quadrupoleYZ
+
atom1quadrupoleZZ
*
atom2quadrupoleZZ
;
real
sci1
=
atom1
.
inducedDipole
.
x
*
atom2
.
dipole
.
x
+
atom1
.
inducedDipole
.
y
*
atom2
.
dipole
.
y
+
atom1
.
inducedDipole
.
z
*
atom2
.
dipole
.
z
+
atom2
.
inducedDipole
.
x
*
atom1
.
dipole
.
x
+
atom2
.
inducedDipole
.
y
*
atom1
.
dipole
.
y
+
atom2
.
inducedDipole
.
z
*
atom1
.
dipole
.
z
;
#endif
#if defined F1 || defined T3
real
sci3
=
atom1
.
inducedDipole
.
x
*
xr
+
atom1
.
inducedDipole
.
y
*
yr
+
atom1
.
inducedDipole
.
z
*
zr
;
#endif
#if defined F1
real
sci7
=
qIr_0
*
atom2
.
inducedDipole
.
x
+
qIr_1
*
atom2
.
inducedDipole
.
y
+
qIr_2
*
atom2
.
inducedDipole
.
z
;
real
sci8
=
qJr_0
*
atom1
.
inducedDipole
.
x
+
qJr_1
*
atom1
.
inducedDipole
.
y
+
qJr_2
*
atom1
.
inducedDipole
.
z
;
#endif
#if defined F1 || defined T1
real
sci4
=
atom2
.
inducedDipole
.
x
*
xr
+
atom2
.
inducedDipole
.
y
*
yr
+
atom2
.
inducedDipole
.
z
*
zr
;
#endif
#if defined F1
real
scip1
=
atom1
.
inducedDipolePolar
.
x
*
atom2
.
dipole
.
x
+
atom1
.
inducedDipolePolar
.
y
*
atom2
.
dipole
.
y
+
atom1
.
inducedDipolePolar
.
z
*
atom2
.
dipole
.
z
+
atom2
.
inducedDipolePolar
.
x
*
atom1
.
dipole
.
x
+
atom2
.
inducedDipolePolar
.
y
*
atom1
.
dipole
.
y
+
atom2
.
inducedDipolePolar
.
z
*
atom1
.
dipole
.
z
;
real
scip2
=
atom1
.
inducedDipole
.
x
*
atom2
.
inducedDipolePolar
.
x
+
atom1
.
inducedDipole
.
y
*
atom2
.
inducedDipolePolar
.
y
+
atom1
.
inducedDipole
.
z
*
atom2
.
inducedDipolePolar
.
z
+
atom2
.
inducedDipole
.
x
*
atom1
.
inducedDipolePolar
.
x
+
atom2
.
inducedDipole
.
y
*
atom1
.
inducedDipolePolar
.
y
+
atom2
.
inducedDipole
.
z
*
atom1
.
inducedDipolePolar
.
z
;
#endif
#if defined F1 || defined T3
real
scip3
=
((
atom1
.
inducedDipolePolar
.
x
)
*
(
xr
)
+
(
atom1
.
inducedDipolePolar
.
y
)
*
(
yr
)
+
(
atom1
.
inducedDipolePolar
.
z
)
*
(
zr
));
#endif
#if defined F1 || defined T1
real
scip4
=
((
atom2
.
inducedDipolePolar
.
x
)
*
(
xr
)
+
(
atom2
.
inducedDipolePolar
.
y
)
*
(
yr
)
+
(
atom2
.
inducedDipolePolar
.
z
)
*
(
zr
));
#endif
#ifdef F1
real
scip7
=
((
qIr_0
)
*
(
atom2
.
inducedDipolePolar
.
x
)
+
(
qIr_1
)
*
(
atom2
.
inducedDipolePolar
.
y
)
+
(
qIr_2
)
*
(
atom2
.
inducedDipolePolar
.
z
));
real
scip8
=
((
qJr_0
)
*
(
atom1
.
inducedDipolePolar
.
x
)
+
(
qJr_1
)
*
(
atom1
.
inducedDipolePolar
.
y
)
+
(
qJr_2
)
*
(
atom1
.
inducedDipolePolar
.
z
));
real
gli1
=
atom2
.
posq
.
w
*
sci3
-
atom1
.
posq
.
w
*
sci4
;
real
gli6
=
sci1
;
real
glip1
=
atom2
.
posq
.
w
*
scip3
-
atom1
.
posq
.
w
*
scip4
;
real
glip6
=
scip1
;
real
gli2
=
-
sc3
*
sci4
-
sci3
*
sc4
;
real
gli3
=
sci3
*
sc6
-
sci4
*
sc5
;
real
gli7
=
2
*
(
sci7
-
sci8
);
real
glip2
=
-
sc3
*
scip4
-
scip3
*
sc4
;
real
glip3
=
scip3
*
sc6
-
scip4
*
sc5
;
real
glip7
=
2
*
(
scip7
-
scip8
);
real
factor3
=
rr3
*
((
gli1
+
gli6
)
*
pScale
+
(
glip1
+
glip6
)
*
dScale
);
real
factor5
=
rr5
*
((
gli2
+
gli7
)
*
pScale
+
(
glip2
+
glip7
)
*
dScale
);
real
factor7
=
rr7
*
(
gli3
*
pScale
+
glip3
*
dScale
);
real
ftm2i_0
=
-
0.5
f
*
(
factor3
*
ddsc3_0
+
factor5
*
ddsc5_0
+
factor7
*
ddsc7_0
);
real
ftm2i_1
=
-
0.5
f
*
(
factor3
*
ddsc3_1
+
factor5
*
ddsc5_1
+
factor7
*
ddsc7_1
);
real
ftm2i_2
=
-
0.5
f
*
(
factor3
*
ddsc3_2
+
factor5
*
ddsc5_2
+
factor7
*
ddsc7_2
);
real
gl0
=
atom1
.
posq
.
w
*
atom2
.
posq
.
w
;
real
gl1
=
atom2
.
posq
.
w
*
sc3
-
atom1
.
posq
.
w
*
sc4
;
real
gl2
=
atom1
.
posq
.
w
*
sc6
+
atom2
.
posq
.
w
*
sc5
-
sc3
*
sc4
;
real
gl3
=
sc3
*
sc6
-
sc4
*
sc5
;
real
gl4
=
sc5
*
sc6
;
real
gl6
=
sc2
;
real
gl7
=
2
*
(
sc7
-
sc8
);
real
gl8
=
2
*
sc10
;
real
gl5
=
-
4
*
sc9
;
real
gf1
=
rr3
*
gl0
+
rr5
*
(
gl1
+
gl6
)
+
rr7
*
(
gl2
+
gl7
+
gl8
)
+
rr9
*
(
gl3
+
gl5
)
+
rr11
*
gl4
;
#endif
#if defined F1 || defined T1
real
gf2
=
-
atom2
.
posq
.
w
*
rr3
+
sc4
*
rr5
-
sc6
*
rr7
;
real
gf5
=
2
*
(
-
atom2
.
posq
.
w
*
rr5
+
sc4
*
rr7
-
sc6
*
rr9
);
#endif
#if defined F1 || defined T3
real
gf3
=
atom1
.
posq
.
w
*
rr3
+
sc3
*
rr5
+
sc5
*
rr7
;
real
gf6
=
2
*
(
-
atom1
.
posq
.
w
*
rr5
-
sc3
*
rr7
-
sc5
*
rr9
);
#endif
#ifdef F1
real
em
=
mScale
*
(
rr1
*
gl0
+
rr3
*
(
gl1
+
gl6
)
+
rr5
*
(
gl2
+
gl7
+
gl8
)
+
rr7
*
(
gl3
+
gl5
)
+
rr9
*
gl4
);
real
ei
=
0.5
f
*
((
gli1
+
gli6
)
*
psc3
+
(
gli2
+
gli7
)
*
psc5
+
gli3
*
psc7
);
energy
=
em
+
ei
;
#endif
#if defined F1 || defined T1
real
qIdJ_0
=
atom1
.
quadrupoleXX
*
atom2
.
dipole
.
x
+
atom1
.
quadrupoleXY
*
atom2
.
dipole
.
y
+
atom1
.
quadrupoleXZ
*
atom2
.
dipole
.
z
;
real
qIdJ_1
=
atom1
.
quadrupoleXY
*
atom2
.
dipole
.
x
+
atom1
.
quadrupoleYY
*
atom2
.
dipole
.
y
+
atom1
.
quadrupoleYZ
*
atom2
.
dipole
.
z
;
real
qIdJ_2
=
atom1
.
quadrupoleXZ
*
atom2
.
dipole
.
x
+
atom1
.
quadrupoleYZ
*
atom2
.
dipole
.
y
+
atom1quadrupoleZZ
*
atom2
.
dipole
.
z
;
real
qIqJr_0
=
atom1
.
quadrupoleXX
*
qJr_0
+
atom1
.
quadrupoleXY
*
qJr_1
+
atom1
.
quadrupoleXZ
*
qJr_2
;
real
qIqJr_1
=
atom1
.
quadrupoleXY
*
qJr_0
+
atom1
.
quadrupoleYY
*
qJr_1
+
atom1
.
quadrupoleYZ
*
qJr_2
;
real
qIqJr_2
=
atom1
.
quadrupoleXZ
*
qJr_0
+
atom1
.
quadrupoleYZ
*
qJr_1
+
atom1quadrupoleZZ
*
qJr_2
;
#endif
#ifdef F1
real
qkqir_0
=
atom2
.
quadrupoleXX
*
qIr_0
+
atom2
.
quadrupoleXY
*
qIr_1
+
atom2
.
quadrupoleXZ
*
qIr_2
;
real
qkqir_1
=
atom2
.
quadrupoleXY
*
qIr_0
+
atom2
.
quadrupoleYY
*
qIr_1
+
atom2
.
quadrupoleYZ
*
qIr_2
;
real
qkqir_2
=
atom2
.
quadrupoleXZ
*
qIr_0
+
atom2
.
quadrupoleYZ
*
qIr_1
+
atom2quadrupoleZZ
*
qIr_2
;
real
qkdi_0
=
atom2
.
quadrupoleXX
*
atom1
.
dipole
.
x
+
atom2
.
quadrupoleXY
*
atom1
.
dipole
.
y
+
atom2
.
quadrupoleXZ
*
atom1
.
dipole
.
z
;
real
qkdi_1
=
atom2
.
quadrupoleXY
*
atom1
.
dipole
.
x
+
atom2
.
quadrupoleYY
*
atom1
.
dipole
.
y
+
atom2
.
quadrupoleYZ
*
atom1
.
dipole
.
z
;
real
qkdi_2
=
atom2
.
quadrupoleXZ
*
atom1
.
dipole
.
x
+
atom2
.
quadrupoleYZ
*
atom1
.
dipole
.
y
+
atom2quadrupoleZZ
*
atom1
.
dipole
.
z
;
real
ftm2_0
=
mScale
*
(
gf1
*
xr
+
gf2
*
atom1
.
dipole
.
x
+
gf3
*
atom2
.
dipole
.
x
+
2
*
rr5
*
(
qkdi_0
-
qIdJ_0
)
+
gf5
*
qIr_0
+
gf6
*
qJr_0
+
4
*
rr7
*
(
qIqJr_0
+
qkqir_0
));
real
ftm2_1
=
mScale
*
(
gf1
*
yr
+
gf2
*
atom1
.
dipole
.
y
+
gf3
*
atom2
.
dipole
.
y
+
2
*
rr5
*
(
qkdi_1
-
qIdJ_1
)
+
gf5
*
qIr_1
+
gf6
*
qJr_1
+
4
*
rr7
*
(
qIqJr_1
+
qkqir_1
));
real
ftm2_2
=
mScale
*
(
gf1
*
zr
+
gf2
*
atom1
.
dipole
.
z
+
gf3
*
atom2
.
dipole
.
z
+
2
*
rr5
*
(
qkdi_2
-
qIdJ_2
)
+
gf5
*
qIr_2
+
gf6
*
qJr_2
+
4
*
rr7
*
(
qIqJr_2
+
qkqir_2
));
real
gfi1
=
rr2
*
(
1.5
f
*
((
gli1
+
gli6
)
*
psc3
+
(
glip1
+
glip6
)
*
dsc3
+
scip2
*
scale3i
)
+
2.5
f
*
((
gli7
+
gli2
)
*
psc5
+
(
glip7
+
glip2
)
*
dsc5
-
(
sci3
*
scip4
+
scip3
*
sci4
)
*
scale5i
)
+
3.5
f
*
(
gli3
*
psc7
+
glip3
*
dsc7
));
ftm2i_0
+=
gfi1
*
xr
;
ftm2i_1
+=
gfi1
*
yr
;
ftm2i_2
+=
gfi1
*
zr
;
#endif
#if defined F1 || defined T1
real
gfi5
=
(
sci4
*
psc7
+
scip4
*
dsc7
);
#endif
#if defined F1 || defined T3
real
gfi6
=
-
(
sci3
*
psc7
+
scip3
*
dsc7
);
#endif
#if defined F1 || defined T1
real
qIuJ_0
=
atom1
.
quadrupoleXX
*
atom2
.
inducedDipole
.
x
+
atom1
.
quadrupoleXY
*
atom2
.
inducedDipole
.
y
+
atom1
.
quadrupoleXZ
*
atom2
.
inducedDipole
.
z
;
real
qIuJ_1
=
atom1
.
quadrupoleXY
*
atom2
.
inducedDipole
.
x
+
atom1
.
quadrupoleYY
*
atom2
.
inducedDipole
.
y
+
atom1
.
quadrupoleYZ
*
atom2
.
inducedDipole
.
z
;
real
qIuJ_2
=
atom1
.
quadrupoleXZ
*
atom2
.
inducedDipole
.
x
+
atom1
.
quadrupoleYZ
*
atom2
.
inducedDipole
.
y
+
atom1quadrupoleZZ
*
atom2
.
inducedDipole
.
z
;
real
qIuJp_0
=
atom1
.
quadrupoleXX
*
atom2
.
inducedDipolePolar
.
x
+
atom1
.
quadrupoleXY
*
atom2
.
inducedDipolePolar
.
y
+
atom1
.
quadrupoleXZ
*
atom2
.
inducedDipolePolar
.
z
;
real
qIuJp_1
=
atom1
.
quadrupoleXY
*
atom2
.
inducedDipolePolar
.
x
+
atom1
.
quadrupoleYY
*
atom2
.
inducedDipolePolar
.
y
+
atom1
.
quadrupoleYZ
*
atom2
.
inducedDipolePolar
.
z
;
real
qIuJp_2
=
atom1
.
quadrupoleXZ
*
atom2
.
inducedDipolePolar
.
x
+
atom1
.
quadrupoleYZ
*
atom2
.
inducedDipolePolar
.
y
+
atom1quadrupoleZZ
*
atom2
.
inducedDipolePolar
.
z
;
#endif
#if defined T3
real
qJuIp_0
=
atom2
.
quadrupoleXX
*
atom1
.
inducedDipolePolar
.
x
+
atom2
.
quadrupoleXY
*
atom1
.
inducedDipolePolar
.
y
+
atom2
.
quadrupoleXZ
*
atom1
.
inducedDipolePolar
.
z
;
real
qJuIp_1
=
atom2
.
quadrupoleXY
*
atom1
.
inducedDipolePolar
.
x
+
atom2
.
quadrupoleYY
*
atom1
.
inducedDipolePolar
.
y
+
atom2
.
quadrupoleYZ
*
atom1
.
inducedDipolePolar
.
z
;
real
qJuIp_2
=
atom2
.
quadrupoleXZ
*
atom1
.
inducedDipolePolar
.
x
+
atom2
.
quadrupoleYZ
*
atom1
.
inducedDipolePolar
.
y
+
atom2quadrupoleZZ
*
atom1
.
inducedDipolePolar
.
z
;
real
qJuI_0
=
atom2
.
quadrupoleXX
*
atom1
.
inducedDipole
.
x
+
atom2
.
quadrupoleXY
*
atom1
.
inducedDipole
.
y
+
atom2
.
quadrupoleXZ
*
atom1
.
inducedDipole
.
z
;
real
qJuI_1
=
atom2
.
quadrupoleXY
*
atom1
.
inducedDipole
.
x
+
atom2
.
quadrupoleYY
*
atom1
.
inducedDipole
.
y
+
atom2
.
quadrupoleYZ
*
atom1
.
inducedDipole
.
z
;
real
qJuI_2
=
atom2
.
quadrupoleXZ
*
atom1
.
inducedDipole
.
x
+
atom2
.
quadrupoleYZ
*
atom1
.
inducedDipole
.
y
+
atom2quadrupoleZZ
*
atom1
.
inducedDipole
.
z
;
#endif
#ifdef F1
real
qkui_0
=
atom2
.
quadrupoleXX
*
atom1
.
inducedDipole
.
x
+
atom2
.
quadrupoleXY
*
atom1
.
inducedDipole
.
y
+
atom2
.
quadrupoleXZ
*
atom1
.
inducedDipole
.
z
;
real
qkui_1
=
atom2
.
quadrupoleXY
*
atom1
.
inducedDipole
.
x
+
atom2
.
quadrupoleYY
*
atom1
.
inducedDipole
.
y
+
atom2
.
quadrupoleYZ
*
atom1
.
inducedDipole
.
z
;
real
qkui_2
=
atom2
.
quadrupoleXZ
*
atom1
.
inducedDipole
.
x
+
atom2
.
quadrupoleYZ
*
atom1
.
inducedDipole
.
y
+
atom2quadrupoleZZ
*
atom1
.
inducedDipole
.
z
;
real
qkuip_0
=
atom2
.
quadrupoleXX
*
atom1
.
inducedDipolePolar
.
x
+
atom2
.
quadrupoleXY
*
atom1
.
inducedDipolePolar
.
y
+
atom2
.
quadrupoleXZ
*
atom1
.
inducedDipolePolar
.
z
;
real
qkuip_1
=
atom2
.
quadrupoleXY
*
atom1
.
inducedDipolePolar
.
x
+
atom2
.
quadrupoleYY
*
atom1
.
inducedDipolePolar
.
y
+
atom2
.
quadrupoleYZ
*
atom1
.
inducedDipolePolar
.
z
;
real
qkuip_2
=
atom2
.
quadrupoleXZ
*
atom1
.
inducedDipolePolar
.
x
+
atom2
.
quadrupoleYZ
*
atom1
.
inducedDipolePolar
.
y
+
atom2quadrupoleZZ
*
atom1
.
inducedDipolePolar
.
z
;
ftm2i_0
+=
0.5
f
*
(
-
atom2
.
posq
.
w
*
(
atom1
.
inducedDipole
.
x
*
psc3
+
atom1
.
inducedDipolePolar
.
x
*
dsc3
)
+
sc4
*
(
atom1
.
inducedDipole
.
x
*
psc5
+
atom1
.
inducedDipolePolar
.
x
*
dsc5
)
-
sc6
*
(
atom1
.
inducedDipole
.
x
*
psc7
+
atom1
.
inducedDipolePolar
.
x
*
dsc7
))
+
0.5
f
*
(
atom1
.
posq
.
w
*
(
atom2
.
inducedDipole
.
x
*
psc3
+
atom2
.
inducedDipolePolar
.
x
*
dsc3
)
+
sc3
*
(
atom2
.
inducedDipole
.
x
*
psc5
+
atom2
.
inducedDipolePolar
.
x
*
dsc5
)
+
sc5
*
(
atom2
.
inducedDipole
.
x
*
psc7
+
atom2
.
inducedDipolePolar
.
x
*
dsc7
))
+
scale5i
*
(
sci4
*
atom1
.
inducedDipolePolar
.
x
+
scip4
*
atom1
.
inducedDipole
.
x
+
sci3
*
atom2
.
inducedDipolePolar
.
x
+
scip3
*
atom2
.
inducedDipole
.
x
)
*
0.5
f
+
0.5
f
*
(
sci4
*
psc5
+
scip4
*
dsc5
)
*
atom1
.
dipole
.
x
+
0.5
f
*
(
sci3
*
psc5
+
scip3
*
dsc5
)
*
atom2
.
dipole
.
x
+
((
qkui_0
-
qIuJ_0
)
*
psc5
+
(
qkuip_0
-
qIuJp_0
)
*
dsc5
)
+
gfi5
*
qIr_0
+
gfi6
*
qJr_0
;
ftm2i_1
+=
0.5
f
*
(
-
atom2
.
posq
.
w
*
(
atom1
.
inducedDipole
.
y
*
psc3
+
atom1
.
inducedDipolePolar
.
y
*
dsc3
)
+
sc4
*
(
atom1
.
inducedDipole
.
y
*
psc5
+
atom1
.
inducedDipolePolar
.
y
*
dsc5
)
-
sc6
*
(
atom1
.
inducedDipole
.
y
*
psc7
+
atom1
.
inducedDipolePolar
.
y
*
dsc7
))
+
(
atom1
.
posq
.
w
*
(
atom2
.
inducedDipole
.
y
*
psc3
+
atom2
.
inducedDipolePolar
.
y
*
dsc3
)
+
sc3
*
(
atom2
.
inducedDipole
.
y
*
psc5
+
atom2
.
inducedDipolePolar
.
y
*
dsc5
)
+
sc5
*
(
atom2
.
inducedDipole
.
y
*
psc7
+
atom2
.
inducedDipolePolar
.
y
*
dsc7
))
*
0.5
f
+
scale5i
*
(
sci4
*
atom1
.
inducedDipolePolar
.
y
+
scip4
*
atom1
.
inducedDipole
.
y
+
sci3
*
atom2
.
inducedDipolePolar
.
y
+
scip3
*
atom2
.
inducedDipole
.
y
)
*
0.5
f
+
0.5
f
*
(
sci4
*
psc5
+
scip4
*
dsc5
)
*
atom1
.
dipole
.
y
+
0.5
f
*
(
sci3
*
psc5
+
scip3
*
dsc5
)
*
atom2
.
dipole
.
y
+
((
qkui_1
-
qIuJ_1
)
*
psc5
+
(
qkuip_1
-
qIuJp_1
)
*
dsc5
)
+
gfi5
*
qIr_1
+
gfi6
*
qJr_1
;
ftm2i_2
+=
0.5
f
*
(
-
atom2
.
posq
.
w
*
(
atom1
.
inducedDipole
.
z
*
psc3
+
atom1
.
inducedDipolePolar
.
z
*
dsc3
)
+
sc4
*
(
atom1
.
inducedDipole
.
z
*
psc5
+
atom1
.
inducedDipolePolar
.
z
*
dsc5
)
-
sc6
*
(
atom1
.
inducedDipole
.
z
*
psc7
+
atom1
.
inducedDipolePolar
.
z
*
dsc7
))
+
(
atom1
.
posq
.
w
*
(
atom2
.
inducedDipole
.
z
*
psc3
+
atom2
.
inducedDipolePolar
.
z
*
dsc3
)
+
sc3
*
(
atom2
.
inducedDipole
.
z
*
psc5
+
atom2
.
inducedDipolePolar
.
z
*
dsc5
)
+
sc5
*
(
atom2
.
inducedDipole
.
z
*
psc7
+
atom2
.
inducedDipolePolar
.
z
*
dsc7
))
*
0.5
f
+
scale5i
*
(
sci4
*
atom1
.
inducedDipolePolar
.
z
+
scip4
*
atom1
.
inducedDipole
.
z
+
sci3
*
atom2
.
inducedDipolePolar
.
z
+
scip3
*
atom2
.
inducedDipole
.
z
)
*
0.5
f
+
0.5
f
*
(
sci4
*
psc5
+
scip4
*
dsc5
)
*
atom1
.
dipole
.
z
+
0.5
f
*
(
sci3
*
psc5
+
scip3
*
dsc5
)
*
atom2
.
dipole
.
z
+
((
qkui_2
-
qIuJ_2
)
*
psc5
+
(
qkuip_2
-
qIuJp_2
)
*
dsc5
)
+
gfi5
*
qIr_2
+
gfi6
*
qJr_2
;
#ifdef DIRECT_POLARIZATION
real
gfd
=
0.5
*
(
3
*
rr2
*
scip2
*
scale3i
-
5
*
rr2
*
(
scip3
*
sci4
+
sci3
*
scip4
)
*
scale5i
);
real
temp5
=
0.5
*
scale5i
;
real
fdir_0
=
gfd
*
xr
+
temp5
*
(
sci4
*
atom1
.
inducedDipolePolar
.
x
+
scip4
*
atom1
.
inducedDipole
.
x
+
sci3
*
atom2
.
inducedDipolePolar
.
x
+
scip3
*
atom2
.
inducedDipole
.
x
);
real
fdir_1
=
gfd
*
yr
+
temp5
*
(
sci4
*
atom1
.
inducedDipolePolar
.
y
+
scip4
*
atom1
.
inducedDipole
.
y
+
sci3
*
atom2
.
inducedDipolePolar
.
y
+
scip3
*
atom2
.
inducedDipole
.
y
);
real
fdir_2
=
gfd
*
zr
+
temp5
*
(
sci4
*
atom1
.
inducedDipolePolar
.
z
+
scip4
*
atom1
.
inducedDipole
.
z
+
sci3
*
atom2
.
inducedDipolePolar
.
z
+
scip3
*
atom2
.
inducedDipole
.
z
);
ftm2i_0
-=
fdir_0
;
ftm2i_1
-=
fdir_1
;
ftm2i_2
-=
fdir_2
;
#else
real
scaleF
=
0.5
f
*
uScale
;
real
inducedFactor3
=
scip2
*
rr3
*
scaleF
;
real
inducedFactor5
=
(
sci3
*
scip4
+
scip3
*
sci4
)
*
rr5
*
scaleF
;
real
findmp_0
=
inducedFactor3
*
ddsc3_0
-
inducedFactor5
*
ddsc5_0
;
real
findmp_1
=
inducedFactor3
*
ddsc3_1
-
inducedFactor5
*
ddsc5_1
;
real
findmp_2
=
inducedFactor3
*
ddsc3_2
-
inducedFactor5
*
ddsc5_2
;
ftm2i_0
-=
findmp_0
;
ftm2i_1
-=
findmp_1
;
ftm2i_2
-=
findmp_2
;
#endif
#endif
#if defined T1
real
gti2
=
0.5
f
*
(
sci4
*
psc5
+
scip4
*
dsc5
);
real
gti5
=
gfi5
;
#endif
#if defined T3
real
gti3
=
0.5
f
*
(
sci3
*
psc5
+
scip3
*
dsc5
);
real
gti6
=
gfi6
;
#endif
#if defined T1 || defined T3
real
dixdk_0
=
atom1
.
dipole
.
y
*
atom2
.
dipole
.
z
-
atom1
.
dipole
.
z
*
atom2
.
dipole
.
y
;
real
dixdk_1
=
atom1
.
dipole
.
z
*
atom2
.
dipole
.
x
-
atom1
.
dipole
.
x
*
atom2
.
dipole
.
z
;
real
dixdk_2
=
atom1
.
dipole
.
x
*
atom2
.
dipole
.
y
-
atom1
.
dipole
.
y
*
atom2
.
dipole
.
x
;
#if defined T1
real
dixuk_0
=
atom1
.
dipole
.
y
*
atom2
.
inducedDipole
.
z
-
atom1
.
dipole
.
z
*
atom2
.
inducedDipole
.
y
;
real
dixuk_1
=
atom1
.
dipole
.
z
*
atom2
.
inducedDipole
.
x
-
atom1
.
dipole
.
x
*
atom2
.
inducedDipole
.
z
;
real
dixuk_2
=
atom1
.
dipole
.
x
*
atom2
.
inducedDipole
.
y
-
atom1
.
dipole
.
y
*
atom2
.
inducedDipole
.
x
;
#endif
#endif
#ifdef T1
real
dixukp_0
=
atom1
.
dipole
.
y
*
atom2
.
inducedDipolePolar
.
z
-
atom1
.
dipole
.
z
*
atom2
.
inducedDipolePolar
.
y
;
real
dixukp_1
=
atom1
.
dipole
.
z
*
atom2
.
inducedDipolePolar
.
x
-
atom1
.
dipole
.
x
*
atom2
.
inducedDipolePolar
.
z
;
real
dixukp_2
=
atom1
.
dipole
.
x
*
atom2
.
inducedDipolePolar
.
y
-
atom1
.
dipole
.
y
*
atom2
.
inducedDipolePolar
.
x
;
#endif
#ifdef T1
real
dixr_0
=
atom1
.
dipole
.
y
*
zr
-
atom1
.
dipole
.
z
*
yr
;
real
dixr_1
=
atom1
.
dipole
.
z
*
xr
-
atom1
.
dipole
.
x
*
zr
;
real
dixr_2
=
atom1
.
dipole
.
x
*
yr
-
atom1
.
dipole
.
y
*
xr
;
#endif
#ifdef T1
real
rxqiukp_0
=
yr
*
qIuJp_2
-
zr
*
qIuJp_1
;
real
rxqiukp_1
=
zr
*
qIuJp_0
-
xr
*
qIuJp_2
;
real
rxqiukp_2
=
xr
*
qIuJp_1
-
yr
*
qIuJp_0
;
real
rxqir_0
=
yr
*
qIr_2
-
zr
*
qIr_1
;
real
rxqir_1
=
zr
*
qIr_0
-
xr
*
qIr_2
;
real
rxqir_2
=
xr
*
qIr_1
-
yr
*
qIr_0
;
real
rxqiuk_0
=
yr
*
qIuJ_2
-
zr
*
qIuJ_1
;
real
rxqiuk_1
=
zr
*
qIuJ_0
-
xr
*
qIuJ_2
;
real
rxqiuk_2
=
xr
*
qIuJ_1
-
yr
*
qIuJ_0
;
real
ukxqir_0
=
atom2
.
inducedDipole
.
y
*
qIr_2
-
atom2
.
inducedDipole
.
z
*
qIr_1
;
real
ukxqir_1
=
atom2
.
inducedDipole
.
z
*
qIr_0
-
atom2
.
inducedDipole
.
x
*
qIr_2
;
real
ukxqir_2
=
atom2
.
inducedDipole
.
x
*
qIr_1
-
atom2
.
inducedDipole
.
y
*
qIr_0
;
real
ukxqirp_0
=
atom2
.
inducedDipolePolar
.
y
*
qIr_2
-
atom2
.
inducedDipolePolar
.
z
*
qIr_1
;
real
ukxqirp_1
=
atom2
.
inducedDipolePolar
.
z
*
qIr_0
-
atom2
.
inducedDipolePolar
.
x
*
qIr_2
;
real
ukxqirp_2
=
atom2
.
inducedDipolePolar
.
x
*
qIr_1
-
atom2
.
inducedDipolePolar
.
y
*
qIr_0
;
real
dixqkr_0
=
atom1
.
dipole
.
y
*
qJr_2
-
atom1
.
dipole
.
z
*
qJr_1
;
real
dixqkr_1
=
atom1
.
dipole
.
z
*
qJr_0
-
atom1
.
dipole
.
x
*
qJr_2
;
real
dixqkr_2
=
atom1
.
dipole
.
x
*
qJr_1
-
atom1
.
dipole
.
y
*
qJr_0
;
real
dkxqir_0
=
atom2
.
dipole
.
y
*
qIr_2
-
atom2
.
dipole
.
z
*
qIr_1
;
real
dkxqir_1
=
atom2
.
dipole
.
z
*
qIr_0
-
atom2
.
dipole
.
x
*
qIr_2
;
real
dkxqir_2
=
atom2
.
dipole
.
x
*
qIr_1
-
atom2
.
dipole
.
y
*
qIr_0
;
real
rxqikr_0
=
yr
*
qIqJr_2
-
zr
*
qIqJr_1
;
real
rxqikr_1
=
zr
*
qIqJr_0
-
xr
*
qIqJr_2
;
real
rxqikr_2
=
xr
*
qIqJr_1
-
yr
*
qIqJr_0
;
real
rxqidk_0
=
yr
*
qIdJ_2
-
zr
*
qIdJ_1
;
real
rxqidk_1
=
zr
*
qIdJ_0
-
xr
*
qIdJ_2
;
real
rxqidk_2
=
xr
*
qIdJ_1
-
yr
*
qIdJ_0
;
real
qkrxqir_0
=
qJr_1
*
qIr_2
-
qJr_2
*
qIr_1
;
real
qkrxqir_1
=
qJr_2
*
qIr_0
-
qJr_0
*
qIr_2
;
real
qkrxqir_2
=
qJr_0
*
qIr_1
-
qJr_1
*
qIr_0
;
#endif
#if defined T1 || defined T3
real
qixqk_0
=
atom1
.
quadrupoleXY
*
atom2
.
quadrupoleXZ
+
atom1
.
quadrupoleYY
*
atom2
.
quadrupoleYZ
+
atom1
.
quadrupoleYZ
*
atom2quadrupoleZZ
-
atom1
.
quadrupoleXZ
*
atom2
.
quadrupoleXY
-
atom1
.
quadrupoleYZ
*
atom2
.
quadrupoleYY
-
atom1quadrupoleZZ
*
atom2
.
quadrupoleYZ
;
real
qixqk_1
=
atom1
.
quadrupoleXZ
*
atom2
.
quadrupoleXX
+
atom1
.
quadrupoleYZ
*
atom2
.
quadrupoleXY
+
atom1quadrupoleZZ
*
atom2
.
quadrupoleXZ
-
atom1
.
quadrupoleXX
*
atom2
.
quadrupoleXZ
-
atom1
.
quadrupoleXY
*
atom2
.
quadrupoleYZ
-
atom1
.
quadrupoleXZ
*
atom2quadrupoleZZ
;
real
qixqk_2
=
atom1
.
quadrupoleXX
*
atom2
.
quadrupoleXY
+
atom1
.
quadrupoleXY
*
atom2
.
quadrupoleYY
+
atom1
.
quadrupoleXZ
*
atom2
.
quadrupoleYZ
-
atom1
.
quadrupoleXY
*
atom2
.
quadrupoleXX
-
atom1
.
quadrupoleYY
*
atom2
.
quadrupoleXY
-
atom1
.
quadrupoleYZ
*
atom2
.
quadrupoleXZ
;
#endif
#ifdef T1
real
ttm2_0
=
-
rr3
*
dixdk_0
+
gf2
*
dixr_0
-
gf5
*
rxqir_0
+
2
*
rr5
*
(
dixqkr_0
+
dkxqir_0
+
rxqidk_0
-
2
*
qixqk_0
)
-
4
*
rr7
*
(
rxqikr_0
+
qkrxqir_0
);
real
ttm2_1
=
-
rr3
*
dixdk_1
+
gf2
*
dixr_1
-
gf5
*
rxqir_1
+
2
*
rr5
*
(
dixqkr_1
+
dkxqir_1
+
rxqidk_1
-
2
*
qixqk_1
)
-
4
*
rr7
*
(
rxqikr_1
+
qkrxqir_1
);
real
ttm2_2
=
-
rr3
*
dixdk_2
+
gf2
*
dixr_2
-
gf5
*
rxqir_2
+
2
*
rr5
*
(
dixqkr_2
+
dkxqir_2
+
rxqidk_2
-
2
*
qixqk_2
)
-
4
*
rr7
*
(
rxqikr_2
+
qkrxqir_2
);
real
ttm2i_0
=
-
(
dixuk_0
*
psc3
+
dixukp_0
*
dsc3
)
*
0.5
f
+
gti2
*
dixr_0
+
((
ukxqir_0
+
rxqiuk_0
)
*
psc5
+
(
ukxqirp_0
+
rxqiukp_0
)
*
dsc5
)
-
gti5
*
rxqir_0
;
real
ttm2i_1
=
-
(
dixuk_1
*
psc3
+
dixukp_1
*
dsc3
)
*
0.5
f
+
gti2
*
dixr_1
+
((
ukxqir_1
+
rxqiuk_1
)
*
psc5
+
(
ukxqirp_1
+
rxqiukp_1
)
*
dsc5
)
-
gti5
*
rxqir_1
;
real
ttm2i_2
=
-
(
dixuk_2
*
psc3
+
dixukp_2
*
dsc3
)
*
0.5
f
+
gti2
*
dixr_2
+
((
ukxqir_2
+
rxqiuk_2
)
*
psc5
+
(
ukxqirp_2
+
rxqiukp_2
)
*
dsc5
)
-
gti5
*
rxqir_2
;
#endif
#ifdef T3
real
qJqIr_0
=
atom2
.
quadrupoleXX
*
qIr_0
+
atom2
.
quadrupoleXY
*
qIr_1
+
atom2
.
quadrupoleXZ
*
qIr_2
;
real
qJqIr_1
=
atom2
.
quadrupoleXY
*
qIr_0
+
atom2
.
quadrupoleYY
*
qIr_1
+
atom2
.
quadrupoleYZ
*
qIr_2
;
real
qJqIr_2
=
atom2
.
quadrupoleXZ
*
qIr_0
+
atom2
.
quadrupoleYZ
*
qIr_1
+
atom2quadrupoleZZ
*
qIr_2
;
real
qJdI_0
=
atom2
.
quadrupoleXX
*
atom1
.
dipole
.
x
+
atom2
.
quadrupoleXY
*
atom1
.
dipole
.
y
+
atom2
.
quadrupoleXZ
*
atom1
.
dipole
.
z
;
real
qJdI_1
=
atom2
.
quadrupoleXY
*
atom1
.
dipole
.
x
+
atom2
.
quadrupoleYY
*
atom1
.
dipole
.
y
+
atom2
.
quadrupoleYZ
*
atom1
.
dipole
.
z
;
real
qJdI_2
=
atom2
.
quadrupoleXZ
*
atom1
.
dipole
.
x
+
atom2
.
quadrupoleYZ
*
atom1
.
dipole
.
y
+
atom2quadrupoleZZ
*
atom1
.
dipole
.
z
;
real
dkxr_0
=
atom2
.
dipole
.
y
*
zr
-
atom2
.
dipole
.
z
*
yr
;
real
dkxr_1
=
atom2
.
dipole
.
z
*
xr
-
atom2
.
dipole
.
x
*
zr
;
real
dkxr_2
=
atom2
.
dipole
.
x
*
yr
-
atom2
.
dipole
.
y
*
xr
;
real
rxqkr_0
=
yr
*
qJr_2
-
zr
*
qJr_1
;
real
rxqkr_1
=
zr
*
qJr_0
-
xr
*
qJr_2
;
real
rxqkr_2
=
xr
*
qJr_1
-
yr
*
qJr_0
;
real
dixqkr_0
=
atom1
.
dipole
.
y
*
qJr_2
-
atom1
.
dipole
.
z
*
qJr_1
;
real
dixqkr_1
=
atom1
.
dipole
.
z
*
qJr_0
-
atom1
.
dipole
.
x
*
qJr_2
;
real
dixqkr_2
=
atom1
.
dipole
.
x
*
qJr_1
-
atom1
.
dipole
.
y
*
qJr_0
;
real
dkxqir_0
=
atom2
.
dipole
.
y
*
qIr_2
-
atom2
.
dipole
.
z
*
qIr_1
;
real
dkxqir_1
=
atom2
.
dipole
.
z
*
qIr_0
-
atom2
.
dipole
.
x
*
qIr_2
;
real
dkxqir_2
=
atom2
.
dipole
.
x
*
qIr_1
-
atom2
.
dipole
.
y
*
qIr_0
;
real
rxqkdi_0
=
yr
*
qJdI_2
-
zr
*
qJdI_1
;
real
rxqkdi_1
=
zr
*
qJdI_0
-
xr
*
qJdI_2
;
real
rxqkdi_2
=
xr
*
qJdI_1
-
yr
*
qJdI_0
;
real
rxqkir_0
=
yr
*
qJqIr_2
-
zr
*
qJqIr_1
;
real
rxqkir_1
=
zr
*
qJqIr_0
-
xr
*
qJqIr_2
;
real
rxqkir_2
=
xr
*
qJqIr_1
-
yr
*
qJqIr_0
;
real
qkrxqir_0
=
qJr_1
*
qIr_2
-
qJr_2
*
qIr_1
;
real
qkrxqir_1
=
qJr_2
*
qIr_0
-
qJr_0
*
qIr_2
;
real
qkrxqir_2
=
qJr_0
*
qIr_1
-
qJr_1
*
qIr_0
;
real
dkxui_0
=
atom2
.
dipole
.
y
*
atom1
.
inducedDipole
.
z
-
atom2
.
dipole
.
z
*
atom1
.
inducedDipole
.
y
;
real
dkxui_1
=
atom2
.
dipole
.
z
*
atom1
.
inducedDipole
.
x
-
atom2
.
dipole
.
x
*
atom1
.
inducedDipole
.
z
;
real
dkxui_2
=
atom2
.
dipole
.
x
*
atom1
.
inducedDipole
.
y
-
atom2
.
dipole
.
y
*
atom1
.
inducedDipole
.
x
;
real
dkxuip_0
=
atom2
.
dipole
.
y
*
atom1
.
inducedDipolePolar
.
z
-
atom2
.
dipole
.
z
*
atom1
.
inducedDipolePolar
.
y
;
real
dkxuip_1
=
atom2
.
dipole
.
z
*
atom1
.
inducedDipolePolar
.
x
-
atom2
.
dipole
.
x
*
atom1
.
inducedDipolePolar
.
z
;
real
dkxuip_2
=
atom2
.
dipole
.
x
*
atom1
.
inducedDipolePolar
.
y
-
atom2
.
dipole
.
y
*
atom1
.
inducedDipolePolar
.
x
;
real
uixqkrp_0
=
atom1
.
inducedDipolePolar
.
y
*
qJr_2
-
atom1
.
inducedDipolePolar
.
z
*
qJr_1
;
real
uixqkrp_1
=
atom1
.
inducedDipolePolar
.
z
*
qJr_0
-
atom1
.
inducedDipolePolar
.
x
*
qJr_2
;
real
uixqkrp_2
=
atom1
.
inducedDipolePolar
.
x
*
qJr_1
-
atom1
.
inducedDipolePolar
.
y
*
qJr_0
;
real
uixqkr_0
=
atom1
.
inducedDipole
.
y
*
qJr_2
-
atom1
.
inducedDipole
.
z
*
qJr_1
;
real
uixqkr_1
=
atom1
.
inducedDipole
.
z
*
qJr_0
-
atom1
.
inducedDipole
.
x
*
qJr_2
;
real
uixqkr_2
=
atom1
.
inducedDipole
.
x
*
qJr_1
-
atom1
.
inducedDipole
.
y
*
qJr_0
;
real
rxqkuip_0
=
yr
*
qJuIp_2
-
zr
*
qJuIp_1
;
real
rxqkuip_1
=
zr
*
qJuIp_0
-
xr
*
qJuIp_2
;
real
rxqkuip_2
=
xr
*
qJuIp_1
-
yr
*
qJuIp_0
;
real
rxqkui_0
=
yr
*
qJuI_2
-
zr
*
qJuI_1
;
real
rxqkui_1
=
zr
*
qJuI_0
-
xr
*
qJuI_2
;
real
rxqkui_2
=
xr
*
qJuI_1
-
yr
*
qJuI_0
;
real
ttm3_0
=
rr3
*
dixdk_0
+
gf3
*
dkxr_0
-
gf6
*
rxqkr_0
-
2
*
rr5
*
(
dixqkr_0
+
dkxqir_0
+
rxqkdi_0
-
2
*
qixqk_0
)
-
4
*
rr7
*
(
rxqkir_0
-
qkrxqir_0
);
real
ttm3_1
=
rr3
*
dixdk_1
+
gf3
*
dkxr_1
-
gf6
*
rxqkr_1
-
2
*
rr5
*
(
dixqkr_1
+
dkxqir_1
+
rxqkdi_1
-
2
*
qixqk_1
)
-
4
*
rr7
*
(
rxqkir_1
-
qkrxqir_1
);
real
ttm3_2
=
rr3
*
dixdk_2
+
gf3
*
dkxr_2
-
gf6
*
rxqkr_2
-
2
*
rr5
*
(
dixqkr_2
+
dkxqir_2
+
rxqkdi_2
-
2
*
qixqk_2
)
-
4
*
rr7
*
(
rxqkir_2
-
qkrxqir_2
);
real
ttm3i_0
=
-
(
dkxui_0
*
psc3
+
dkxuip_0
*
dsc3
)
*
0.5
f
+
gti3
*
dkxr_0
-
((
uixqkr_0
+
rxqkui_0
)
*
psc5
+
(
uixqkrp_0
+
rxqkuip_0
)
*
dsc5
)
-
gti6
*
rxqkr_0
;
real
ttm3i_1
=
-
(
dkxui_1
*
psc3
+
dkxuip_1
*
dsc3
)
*
0.5
f
+
gti3
*
dkxr_1
-
((
uixqkr_1
+
rxqkui_1
)
*
psc5
+
(
uixqkrp_1
+
rxqkuip_1
)
*
dsc5
)
-
gti6
*
rxqkr_1
;
real
ttm3i_2
=
-
(
dkxui_2
*
psc3
+
dkxuip_2
*
dsc3
)
*
0.5
f
+
gti3
*
dkxr_2
-
((
uixqkr_2
+
rxqkui_2
)
*
psc5
+
(
uixqkrp_2
+
rxqkuip_2
)
*
dsc5
)
-
gti6
*
rxqkr_2
;
#endif
if
(
mScale
<
1
)
{
#ifdef T1
ttm2_0
*=
mScale
;
ttm2_1
*=
mScale
;
ttm2_2
*=
mScale
;
#endif
#ifdef T3
ttm3_0
*=
mScale
;
ttm3_1
*=
mScale
;
ttm3_2
*=
mScale
;
#endif
}
#ifdef F1
outputForce
.
x
=
-
(
ftm2_0
+
ftm2i_0
);
outputForce
.
y
=
-
(
ftm2_1
+
ftm2i_1
);
outputForce
.
z
=
-
(
ftm2_2
+
ftm2i_2
);
#endif
#ifdef T1
outputForce
.
x
=
(
ttm2_0
+
ttm2i_0
);
outputForce
.
y
=
(
ttm2_1
+
ttm2i_1
);
outputForce
.
z
=
(
ttm2_2
+
ttm2i_2
);
#endif
#ifdef T3
outputForce
.
x
=
(
ttm3_0
+
ttm3i_0
);
outputForce
.
y
=
(
ttm3_1
+
ttm3i_1
);
outputForce
.
z
=
(
ttm3_2
+
ttm3i_2
);
#endif
}
plugins/amoeba/platforms/cuda/src/kernels/electrostaticPairForceNoQuadrupoles.cu
deleted
100644 → 0
View file @
4c00b312
/**
* This defines three different closely related functions, depending on which constant (F1, T1, or T3) is defined.
*/
#if defined F1
__device__
void
computeOneInteractionF1
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
float
dScale
,
float
pScale
,
float
mScale
,
real
&
energy
,
real3
&
outputForce
)
{
#elif defined T1
__device__
void
computeOneInteractionT1
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
float
dScale
,
float
pScale
,
float
mScale
,
real3
&
outputForce
)
{
#else
__device__
void
computeOneInteractionT3
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
float
dScale
,
float
pScale
,
float
mScale
,
real3
&
outputForce
)
{
#endif
#ifdef F1
const
float
uScale
=
1
;
real
ddsc3_0
=
0
;
real
ddsc3_1
=
0
;
real
ddsc3_2
=
0
;
real
ddsc5_0
=
0
;
real
ddsc5_1
=
0
;
real
ddsc5_2
=
0
;
real
ddsc7_0
=
0
;
real
ddsc7_1
=
0
;
real
ddsc7_2
=
0
;
#endif
real
xr
=
atom2
.
posq
.
x
-
atom1
.
posq
.
x
;
real
yr
=
atom2
.
posq
.
y
-
atom1
.
posq
.
y
;
real
zr
=
atom2
.
posq
.
z
-
atom1
.
posq
.
z
;
real
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
real
r
=
SQRT
(
r2
);
real
rr1
=
RECIP
(
r
);
real
rr2
=
rr1
*
rr1
;
real
rr3
=
rr1
*
rr2
;
real
rr5
=
3
*
rr3
*
rr2
;
real
rr7
=
5
*
rr5
*
rr2
;
real
rr9
=
7
*
rr7
*
rr2
;
#ifdef F1
real
rr11
=
9
*
rr9
*
rr2
;
#endif
real
scale3
=
1
;
real
scale5
=
1
;
real
scale7
=
1
;
real
pdamp
=
atom1
.
damp
*
atom2
.
damp
;
if
(
pdamp
!=
0
)
{
real
ratio
=
r
/
pdamp
;
float
pGamma
=
atom2
.
thole
>
atom1
.
thole
?
atom1
.
thole
:
atom2
.
thole
;
real
damp
=
ratio
*
ratio
*
ratio
*
pGamma
;
real
dampExp
=
EXP
(
-
damp
);
real
damp1
=
damp
+
1
;
real
damp2
=
damp
*
damp
;
scale3
=
1
-
dampExp
;
scale5
=
1
-
damp1
*
dampExp
;
scale7
=
1
-
(
damp1
+
0.6
f
*
damp2
)
*
dampExp
;
#ifdef F1
real
factor
=
3
*
damp
*
dampExp
*
rr2
;
real
factor7
=
-
0.2
f
+
0.6
f
*
damp
;
ddsc3_0
=
factor
*
xr
;
ddsc5_0
=
ddsc3_0
*
damp
;
ddsc7_0
=
ddsc5_0
*
factor7
;
ddsc3_1
=
factor
*
yr
;
ddsc5_1
=
ddsc3_1
*
damp
;
ddsc7_1
=
ddsc5_1
*
factor7
;
ddsc3_2
=
factor
*
zr
;
ddsc5_2
=
ddsc3_2
*
damp
;
ddsc7_2
=
ddsc5_2
*
factor7
;
#endif
}
#if defined F1
real
scale3i
=
rr3
*
scale3
*
uScale
;
real
scale5i
=
rr5
*
scale5
*
uScale
;
#endif
real
dsc3
=
rr3
*
scale3
*
dScale
;
real
psc3
=
rr3
*
scale3
*
pScale
;
real
dsc5
=
rr5
*
scale5
*
dScale
;
real
psc5
=
rr5
*
scale5
*
pScale
;
real
dsc7
=
rr7
*
scale7
*
dScale
;
real
psc7
=
rr7
*
scale7
*
pScale
;
#if defined F1
real
sc2
=
atom1
.
dipole
.
x
*
atom2
.
dipole
.
x
+
atom1
.
dipole
.
y
*
atom2
.
dipole
.
y
+
atom1
.
dipole
.
z
*
atom2
.
dipole
.
z
;
#endif
#if defined F1 || defined T1
real
sc4
=
atom2
.
dipole
.
x
*
xr
+
atom2
.
dipole
.
y
*
yr
+
atom2
.
dipole
.
z
*
zr
;
#endif
#if defined F1 || defined T3
real
sc3
=
atom1
.
dipole
.
x
*
xr
+
atom1
.
dipole
.
y
*
yr
+
atom1
.
dipole
.
z
*
zr
;
#endif
#if defined F1
real
sci1
=
atom1
.
inducedDipole
.
x
*
atom2
.
dipole
.
x
+
atom1
.
inducedDipole
.
y
*
atom2
.
dipole
.
y
+
atom1
.
inducedDipole
.
z
*
atom2
.
dipole
.
z
+
atom2
.
inducedDipole
.
x
*
atom1
.
dipole
.
x
+
atom2
.
inducedDipole
.
y
*
atom1
.
dipole
.
y
+
atom2
.
inducedDipole
.
z
*
atom1
.
dipole
.
z
;
#endif
#if defined F1 || defined T3
real
sci3
=
atom1
.
inducedDipole
.
x
*
xr
+
atom1
.
inducedDipole
.
y
*
yr
+
atom1
.
inducedDipole
.
z
*
zr
;
#endif
#if defined F1 || defined T1
real
sci4
=
atom2
.
inducedDipole
.
x
*
xr
+
atom2
.
inducedDipole
.
y
*
yr
+
atom2
.
inducedDipole
.
z
*
zr
;
#endif
#if defined F1
real
scip1
=
atom1
.
inducedDipolePolar
.
x
*
atom2
.
dipole
.
x
+
atom1
.
inducedDipolePolar
.
y
*
atom2
.
dipole
.
y
+
atom1
.
inducedDipolePolar
.
z
*
atom2
.
dipole
.
z
+
atom2
.
inducedDipolePolar
.
x
*
atom1
.
dipole
.
x
+
atom2
.
inducedDipolePolar
.
y
*
atom1
.
dipole
.
y
+
atom2
.
inducedDipolePolar
.
z
*
atom1
.
dipole
.
z
;
real
scip2
=
atom1
.
inducedDipole
.
x
*
atom2
.
inducedDipolePolar
.
x
+
atom1
.
inducedDipole
.
y
*
atom2
.
inducedDipolePolar
.
y
+
atom1
.
inducedDipole
.
z
*
atom2
.
inducedDipolePolar
.
z
+
atom2
.
inducedDipole
.
x
*
atom1
.
inducedDipolePolar
.
x
+
atom2
.
inducedDipole
.
y
*
atom1
.
inducedDipolePolar
.
y
+
atom2
.
inducedDipole
.
z
*
atom1
.
inducedDipolePolar
.
z
;
#endif
#if defined F1 || defined T3
real
scip3
=
((
atom1
.
inducedDipolePolar
.
x
)
*
(
xr
)
+
(
atom1
.
inducedDipolePolar
.
y
)
*
(
yr
)
+
(
atom1
.
inducedDipolePolar
.
z
)
*
(
zr
));
#endif
#if defined F1 || defined T1
real
scip4
=
((
atom2
.
inducedDipolePolar
.
x
)
*
(
xr
)
+
(
atom2
.
inducedDipolePolar
.
y
)
*
(
yr
)
+
(
atom2
.
inducedDipolePolar
.
z
)
*
(
zr
));
#endif
#ifdef F1
real
gli1
=
atom2
.
posq
.
w
*
sci3
-
atom1
.
posq
.
w
*
sci4
;
real
gli6
=
sci1
;
real
glip1
=
atom2
.
posq
.
w
*
scip3
-
atom1
.
posq
.
w
*
scip4
;
real
glip6
=
scip1
;
real
gli2
=
-
sc3
*
sci4
-
sci3
*
sc4
;
real
glip2
=
-
sc3
*
scip4
-
scip3
*
sc4
;
real
factor3
=
rr3
*
((
gli1
+
gli6
)
*
pScale
+
(
glip1
+
glip6
)
*
dScale
);
real
factor5
=
rr5
*
(
gli2
*
pScale
+
glip2
*
dScale
);
real
ftm2i_0
=
-
0.5
f
*
(
factor3
*
ddsc3_0
+
factor5
*
ddsc5_0
);
real
ftm2i_1
=
-
0.5
f
*
(
factor3
*
ddsc3_1
+
factor5
*
ddsc5_1
);
real
ftm2i_2
=
-
0.5
f
*
(
factor3
*
ddsc3_2
+
factor5
*
ddsc5_2
);
real
gl0
=
atom1
.
posq
.
w
*
atom2
.
posq
.
w
;
real
gl1
=
atom2
.
posq
.
w
*
sc3
-
atom1
.
posq
.
w
*
sc4
;
real
gl2
=
-
sc3
*
sc4
;
real
gl6
=
sc2
;
real
gf1
=
rr3
*
gl0
+
rr5
*
(
gl1
+
gl6
)
+
rr7
*
gl2
;
#endif
#if defined F1 || defined T1
real
gf2
=
-
atom2
.
posq
.
w
*
rr3
+
sc4
*
rr5
;
real
gf5
=
2
*
(
-
atom2
.
posq
.
w
*
rr5
+
sc4
*
rr7
);
#endif
#if defined F1 || defined T3
real
gf3
=
atom1
.
posq
.
w
*
rr3
+
sc3
*
rr5
;
real
gf6
=
2
*
(
-
atom1
.
posq
.
w
*
rr5
-
sc3
*
rr7
);
#endif
#ifdef F1
real
em
=
mScale
*
(
rr1
*
gl0
+
rr3
*
(
gl1
+
gl6
)
+
rr5
*
gl2
);
real
ei
=
0.5
f
*
((
gli1
+
gli6
)
*
psc3
+
gli2
*
psc5
);
energy
=
em
+
ei
;
#endif
#ifdef F1
real
ftm2_0
=
mScale
*
(
gf1
*
xr
+
gf2
*
atom1
.
dipole
.
x
+
gf3
*
atom2
.
dipole
.
x
);
real
ftm2_1
=
mScale
*
(
gf1
*
yr
+
gf2
*
atom1
.
dipole
.
y
+
gf3
*
atom2
.
dipole
.
y
);
real
ftm2_2
=
mScale
*
(
gf1
*
zr
+
gf2
*
atom1
.
dipole
.
z
+
gf3
*
atom2
.
dipole
.
z
);
real
gfi1
=
rr2
*
(
1.5
f
*
((
gli1
+
gli6
)
*
psc3
+
(
glip1
+
glip6
)
*
dsc3
+
scip2
*
scale3i
)
+
2.5
f
*
(
gli2
*
psc5
+
glip2
*
dsc5
-
(
sci3
*
scip4
+
scip3
*
sci4
)
*
scale5i
));
ftm2i_0
+=
gfi1
*
xr
;
ftm2i_1
+=
gfi1
*
yr
;
ftm2i_2
+=
gfi1
*
zr
;
#endif
#if defined F1 || defined T1
real
gfi5
=
(
sci4
*
psc7
+
scip4
*
dsc7
);
#endif
#if defined F1 || defined T3
real
gfi6
=
-
(
sci3
*
psc7
+
scip3
*
dsc7
);
#endif
#ifdef F1
ftm2i_0
+=
0.5
f
*
(
-
atom2
.
posq
.
w
*
(
atom1
.
inducedDipole
.
x
*
psc3
+
atom1
.
inducedDipolePolar
.
x
*
dsc3
)
+
sc4
*
(
atom1
.
inducedDipole
.
x
*
psc5
+
atom1
.
inducedDipolePolar
.
x
*
dsc5
))
+
0.5
f
*
(
atom1
.
posq
.
w
*
(
atom2
.
inducedDipole
.
x
*
psc3
+
atom2
.
inducedDipolePolar
.
x
*
dsc3
)
+
sc3
*
(
atom2
.
inducedDipole
.
x
*
psc5
+
atom2
.
inducedDipolePolar
.
x
*
dsc5
))
+
scale5i
*
(
sci4
*
atom1
.
inducedDipolePolar
.
x
+
scip4
*
atom1
.
inducedDipole
.
x
+
sci3
*
atom2
.
inducedDipolePolar
.
x
+
scip3
*
atom2
.
inducedDipole
.
x
)
*
0.5
f
+
0.5
f
*
(
sci4
*
psc5
+
scip4
*
dsc5
)
*
atom1
.
dipole
.
x
+
0.5
f
*
(
sci3
*
psc5
+
scip3
*
dsc5
)
*
atom2
.
dipole
.
x
;
ftm2i_1
+=
0.5
f
*
(
-
atom2
.
posq
.
w
*
(
atom1
.
inducedDipole
.
y
*
psc3
+
atom1
.
inducedDipolePolar
.
y
*
dsc3
)
+
sc4
*
(
atom1
.
inducedDipole
.
y
*
psc5
+
atom1
.
inducedDipolePolar
.
y
*
dsc5
))
+
(
atom1
.
posq
.
w
*
(
atom2
.
inducedDipole
.
y
*
psc3
+
atom2
.
inducedDipolePolar
.
y
*
dsc3
)
+
sc3
*
(
atom2
.
inducedDipole
.
y
*
psc5
+
atom2
.
inducedDipolePolar
.
y
*
dsc5
))
*
0.5
f
+
scale5i
*
(
sci4
*
atom1
.
inducedDipolePolar
.
y
+
scip4
*
atom1
.
inducedDipole
.
y
+
sci3
*
atom2
.
inducedDipolePolar
.
y
+
scip3
*
atom2
.
inducedDipole
.
y
)
*
0.5
f
+
0.5
f
*
(
sci4
*
psc5
+
scip4
*
dsc5
)
*
atom1
.
dipole
.
y
+
0.5
f
*
(
sci3
*
psc5
+
scip3
*
dsc5
)
*
atom2
.
dipole
.
y
;
ftm2i_2
+=
0.5
f
*
(
-
atom2
.
posq
.
w
*
(
atom1
.
inducedDipole
.
z
*
psc3
+
atom1
.
inducedDipolePolar
.
z
*
dsc3
)
+
sc4
*
(
atom1
.
inducedDipole
.
z
*
psc5
+
atom1
.
inducedDipolePolar
.
z
*
dsc5
))
+
(
atom1
.
posq
.
w
*
(
atom2
.
inducedDipole
.
z
*
psc3
+
atom2
.
inducedDipolePolar
.
z
*
dsc3
)
+
sc3
*
(
atom2
.
inducedDipole
.
z
*
psc5
+
atom2
.
inducedDipolePolar
.
z
*
dsc5
))
*
0.5
f
+
scale5i
*
(
sci4
*
atom1
.
inducedDipolePolar
.
z
+
scip4
*
atom1
.
inducedDipole
.
z
+
sci3
*
atom2
.
inducedDipolePolar
.
z
+
scip3
*
atom2
.
inducedDipole
.
z
)
*
0.5
f
+
0.5
f
*
(
sci4
*
psc5
+
scip4
*
dsc5
)
*
atom1
.
dipole
.
z
+
0.5
f
*
(
sci3
*
psc5
+
scip3
*
dsc5
)
*
atom2
.
dipole
.
z
;
#ifdef DIRECT_POLARIZATION
real
gfd
=
0.5
*
(
3
*
rr2
*
scip2
*
scale3i
-
5
*
rr2
*
(
scip3
*
sci4
+
sci3
*
scip4
)
*
scale5i
);
real
temp5
=
0.5
*
scale5i
;
real
fdir_0
=
gfd
*
xr
+
temp5
*
(
sci4
*
atom1
.
inducedDipolePolar
.
x
+
scip4
*
atom1
.
inducedDipole
.
x
+
sci3
*
atom2
.
inducedDipolePolar
.
x
+
scip3
*
atom2
.
inducedDipole
.
x
);
real
fdir_1
=
gfd
*
yr
+
temp5
*
(
sci4
*
atom1
.
inducedDipolePolar
.
y
+
scip4
*
atom1
.
inducedDipole
.
y
+
sci3
*
atom2
.
inducedDipolePolar
.
y
+
scip3
*
atom2
.
inducedDipole
.
y
);
real
fdir_2
=
gfd
*
zr
+
temp5
*
(
sci4
*
atom1
.
inducedDipolePolar
.
z
+
scip4
*
atom1
.
inducedDipole
.
z
+
sci3
*
atom2
.
inducedDipolePolar
.
z
+
scip3
*
atom2
.
inducedDipole
.
z
);
ftm2i_0
-=
fdir_0
;
ftm2i_1
-=
fdir_1
;
ftm2i_2
-=
fdir_2
;
#else
real
scaleF
=
0.5
f
*
uScale
;
real
inducedFactor3
=
scip2
*
rr3
*
scaleF
;
real
inducedFactor5
=
(
sci3
*
scip4
+
scip3
*
sci4
)
*
rr5
*
scaleF
;
real
findmp_0
=
inducedFactor3
*
ddsc3_0
-
inducedFactor5
*
ddsc5_0
;
real
findmp_1
=
inducedFactor3
*
ddsc3_1
-
inducedFactor5
*
ddsc5_1
;
real
findmp_2
=
inducedFactor3
*
ddsc3_2
-
inducedFactor5
*
ddsc5_2
;
ftm2i_0
-=
findmp_0
;
ftm2i_1
-=
findmp_1
;
ftm2i_2
-=
findmp_2
;
#endif
#endif
#if defined T1
real
gti2
=
0.5
f
*
(
sci4
*
psc5
+
scip4
*
dsc5
);
real
gti5
=
gfi5
;
#endif
#if defined T3
real
gti3
=
0.5
f
*
(
sci3
*
psc5
+
scip3
*
dsc5
);
real
gti6
=
gfi6
;
#endif
#if defined T1 || defined T3
real
dixdk_0
=
atom1
.
dipole
.
y
*
atom2
.
dipole
.
z
-
atom1
.
dipole
.
z
*
atom2
.
dipole
.
y
;
real
dixdk_1
=
atom1
.
dipole
.
z
*
atom2
.
dipole
.
x
-
atom1
.
dipole
.
x
*
atom2
.
dipole
.
z
;
real
dixdk_2
=
atom1
.
dipole
.
x
*
atom2
.
dipole
.
y
-
atom1
.
dipole
.
y
*
atom2
.
dipole
.
x
;
#if defined T1
real
dixuk_0
=
atom1
.
dipole
.
y
*
atom2
.
inducedDipole
.
z
-
atom1
.
dipole
.
z
*
atom2
.
inducedDipole
.
y
;
real
dixuk_1
=
atom1
.
dipole
.
z
*
atom2
.
inducedDipole
.
x
-
atom1
.
dipole
.
x
*
atom2
.
inducedDipole
.
z
;
real
dixuk_2
=
atom1
.
dipole
.
x
*
atom2
.
inducedDipole
.
y
-
atom1
.
dipole
.
y
*
atom2
.
inducedDipole
.
x
;
#endif
#endif
#ifdef T1
real
dixukp_0
=
atom1
.
dipole
.
y
*
atom2
.
inducedDipolePolar
.
z
-
atom1
.
dipole
.
z
*
atom2
.
inducedDipolePolar
.
y
;
real
dixukp_1
=
atom1
.
dipole
.
z
*
atom2
.
inducedDipolePolar
.
x
-
atom1
.
dipole
.
x
*
atom2
.
inducedDipolePolar
.
z
;
real
dixukp_2
=
atom1
.
dipole
.
x
*
atom2
.
inducedDipolePolar
.
y
-
atom1
.
dipole
.
y
*
atom2
.
inducedDipolePolar
.
x
;
#endif
#ifdef T1
real
dixr_0
=
atom1
.
dipole
.
y
*
zr
-
atom1
.
dipole
.
z
*
yr
;
real
dixr_1
=
atom1
.
dipole
.
z
*
xr
-
atom1
.
dipole
.
x
*
zr
;
real
dixr_2
=
atom1
.
dipole
.
x
*
yr
-
atom1
.
dipole
.
y
*
xr
;
#endif
#ifdef T1
real
ttm2_0
=
-
rr3
*
dixdk_0
+
gf2
*
dixr_0
;
real
ttm2_1
=
-
rr3
*
dixdk_1
+
gf2
*
dixr_1
;
real
ttm2_2
=
-
rr3
*
dixdk_2
+
gf2
*
dixr_2
;
real
ttm2i_0
=
-
(
dixuk_0
*
psc3
+
dixukp_0
*
dsc3
)
*
0.5
f
+
gti2
*
dixr_0
;
real
ttm2i_1
=
-
(
dixuk_1
*
psc3
+
dixukp_1
*
dsc3
)
*
0.5
f
+
gti2
*
dixr_1
;
real
ttm2i_2
=
-
(
dixuk_2
*
psc3
+
dixukp_2
*
dsc3
)
*
0.5
f
+
gti2
*
dixr_2
;
#endif
#ifdef T3
real
dkxr_0
=
atom2
.
dipole
.
y
*
zr
-
atom2
.
dipole
.
z
*
yr
;
real
dkxr_1
=
atom2
.
dipole
.
z
*
xr
-
atom2
.
dipole
.
x
*
zr
;
real
dkxr_2
=
atom2
.
dipole
.
x
*
yr
-
atom2
.
dipole
.
y
*
xr
;
real
dkxui_0
=
atom2
.
dipole
.
y
*
atom1
.
inducedDipole
.
z
-
atom2
.
dipole
.
z
*
atom1
.
inducedDipole
.
y
;
real
dkxui_1
=
atom2
.
dipole
.
z
*
atom1
.
inducedDipole
.
x
-
atom2
.
dipole
.
x
*
atom1
.
inducedDipole
.
z
;
real
dkxui_2
=
atom2
.
dipole
.
x
*
atom1
.
inducedDipole
.
y
-
atom2
.
dipole
.
y
*
atom1
.
inducedDipole
.
x
;
real
dkxuip_0
=
atom2
.
dipole
.
y
*
atom1
.
inducedDipolePolar
.
z
-
atom2
.
dipole
.
z
*
atom1
.
inducedDipolePolar
.
y
;
real
dkxuip_1
=
atom2
.
dipole
.
z
*
atom1
.
inducedDipolePolar
.
x
-
atom2
.
dipole
.
x
*
atom1
.
inducedDipolePolar
.
z
;
real
dkxuip_2
=
atom2
.
dipole
.
x
*
atom1
.
inducedDipolePolar
.
y
-
atom2
.
dipole
.
y
*
atom1
.
inducedDipolePolar
.
x
;
real
ttm3_0
=
rr3
*
dixdk_0
+
gf3
*
dkxr_0
;
real
ttm3_1
=
rr3
*
dixdk_1
+
gf3
*
dkxr_1
;
real
ttm3_2
=
rr3
*
dixdk_2
+
gf3
*
dkxr_2
;
real
ttm3i_0
=
-
(
dkxui_0
*
psc3
+
dkxuip_0
*
dsc3
)
*
0.5
f
+
gti3
*
dkxr_0
;
real
ttm3i_1
=
-
(
dkxui_1
*
psc3
+
dkxuip_1
*
dsc3
)
*
0.5
f
+
gti3
*
dkxr_1
;
real
ttm3i_2
=
-
(
dkxui_2
*
psc3
+
dkxuip_2
*
dsc3
)
*
0.5
f
+
gti3
*
dkxr_2
;
#endif
if
(
mScale
<
1
)
{
#ifdef T1
ttm2_0
*=
mScale
;
ttm2_1
*=
mScale
;
ttm2_2
*=
mScale
;
#endif
#ifdef T3
ttm3_0
*=
mScale
;
ttm3_1
*=
mScale
;
ttm3_2
*=
mScale
;
#endif
}
#ifdef F1
outputForce
.
x
=
-
(
ftm2_0
+
ftm2i_0
);
outputForce
.
y
=
-
(
ftm2_1
+
ftm2i_1
);
outputForce
.
z
=
-
(
ftm2_2
+
ftm2i_2
);
#endif
#ifdef T1
outputForce
.
x
=
(
ttm2_0
+
ttm2i_0
);
outputForce
.
y
=
(
ttm2_1
+
ttm2i_1
);
outputForce
.
z
=
(
ttm2_2
+
ttm2i_2
);
#endif
#ifdef T3
outputForce
.
x
=
(
ttm3_0
+
ttm3i_0
);
outputForce
.
y
=
(
ttm3_1
+
ttm3i_1
);
outputForce
.
z
=
(
ttm3_2
+
ttm3i_2
);
#endif
}
plugins/amoeba/platforms/cuda/src/kernels/multipoleElectrostatics.cu
View file @
b7088b74
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef
struct
{
real4
posq
;
real3
force
,
dipole
,
inducedDipole
,
inducedDipolePolar
;
real3
pos
,
force
,
torque
,
inducedDipole
,
inducedDipolePolar
,
sphericalDipole
;
real
q
;
float
thole
,
damp
;
#ifdef INCLUDE_QUADRUPOLES
real
quadrupoleXX
,
quadrupoleXY
,
quadrupoleXZ
;
real
quadrupoleYY
,
quadrupoleYZ
;
real
sphericalQuadrupole
[
5
];
#endif
float
thole
,
damp
;
}
AtomData
;
__device__
void
computeOneInteractionF1
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
float
dScale
,
float
pScale
,
float
mScale
,
real
&
energy
,
real3
&
outputForce
);
__device__
void
computeOneInteractionT1
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
float
dScale
,
float
pScale
,
float
mScale
,
real3
&
outputForce
);
__device__
void
computeOneInteractionT3
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
float
dScale
,
float
pScale
,
float
mScale
,
real3
&
outputForce
);
inline
__device__
void
loadAtomData
(
AtomData
&
data
,
int
atom
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float2
*
__restrict__
dampingAndThole
)
{
data
.
posq
=
posq
[
atom
];
data
.
dipole
.
x
=
labFrameDipole
[
atom
*
3
];
data
.
dipole
.
y
=
labFrameDipole
[
atom
*
3
+
1
];
data
.
dipole
.
z
=
labFrameDipole
[
atom
*
3
+
2
];
inline
__device__
void
loadAtomData
(
AtomData
&
data
,
int
atom
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
sphericalDipole
,
const
real
*
__restrict__
sphericalQuadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float2
*
__restrict__
dampingAndThole
)
{
real4
atomPosq
=
posq
[
atom
];
data
.
pos
=
make_real3
(
atomPosq
.
x
,
atomPosq
.
y
,
atomPosq
.
z
);
data
.
q
=
atomPosq
.
w
;
data
.
sphericalDipole
.
x
=
sphericalDipole
[
atom
*
3
];
data
.
sphericalDipole
.
y
=
sphericalDipole
[
atom
*
3
+
1
];
data
.
sphericalDipole
.
z
=
sphericalDipole
[
atom
*
3
+
2
];
#ifdef INCLUDE_QUADRUPOLES
data
.
quadrupoleXX
=
labFrame
Quadrupole
[
atom
*
5
];
data
.
quadrupoleXY
=
labFrame
Quadrupole
[
atom
*
5
+
1
];
data
.
quadrupoleXZ
=
labFrame
Quadrupole
[
atom
*
5
+
2
];
data
.
quadrupoleYY
=
labFrame
Quadrupole
[
atom
*
5
+
3
];
data
.
quadrupoleYZ
=
labFrame
Quadrupole
[
atom
*
5
+
4
];
data
.
sphericalQuadrupole
[
0
]
=
spherical
Quadrupole
[
atom
*
5
];
data
.
sphericalQuadrupole
[
1
]
=
spherical
Quadrupole
[
atom
*
5
+
1
];
data
.
sphericalQuadrupole
[
2
]
=
spherical
Quadrupole
[
atom
*
5
+
2
];
data
.
sphericalQuadrupole
[
3
]
=
spherical
Quadrupole
[
atom
*
5
+
3
];
data
.
sphericalQuadrupole
[
4
]
=
spherical
Quadrupole
[
atom
*
5
+
4
];
#endif
data
.
inducedDipole
.
x
=
inducedDipole
[
atom
*
3
];
data
.
inducedDipole
.
y
=
inducedDipole
[
atom
*
3
+
1
];
...
...
@@ -57,6 +54,322 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
return
(
x
&&
y
?
0.0
f
:
(
x
&&
p
?
0.5
f
:
1.0
f
));
}
__device__
void
computeOneInteraction
(
AtomData
&
atom1
,
AtomData
&
atom2
,
bool
hasExclusions
,
float
dScale
,
float
pScale
,
float
mScale
,
float
forceFactor
,
real
&
energy
)
{
// Compute the displacement.
real3
delta
;
delta
.
x
=
atom2
.
pos
.
x
-
atom1
.
pos
.
x
;
delta
.
y
=
atom2
.
pos
.
y
-
atom1
.
pos
.
y
;
delta
.
z
=
atom2
.
pos
.
z
-
atom1
.
pos
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
rInv
=
RSQRT
(
r2
);
real
r
=
r2
*
rInv
;
// Rotate the various dipoles and quadrupoles.
real
qiRotationMatrix
[
3
][
3
];
buildQIRotationMatrix
(
delta
,
rInv
,
qiRotationMatrix
);
real3
qiUindI
=
0.5
f
*
make_real3
(
qiRotationMatrix
[
0
][
1
]
*
atom1
.
inducedDipole
.
x
+
qiRotationMatrix
[
0
][
2
]
*
atom1
.
inducedDipole
.
y
+
qiRotationMatrix
[
0
][
0
]
*
atom1
.
inducedDipole
.
z
,
qiRotationMatrix
[
1
][
1
]
*
atom1
.
inducedDipole
.
x
+
qiRotationMatrix
[
1
][
2
]
*
atom1
.
inducedDipole
.
y
+
qiRotationMatrix
[
1
][
0
]
*
atom1
.
inducedDipole
.
z
,
qiRotationMatrix
[
2
][
1
]
*
atom1
.
inducedDipole
.
x
+
qiRotationMatrix
[
2
][
2
]
*
atom1
.
inducedDipole
.
y
+
qiRotationMatrix
[
2
][
0
]
*
atom1
.
inducedDipole
.
z
);
real3
qiUindJ
=
0.5
f
*
make_real3
(
qiRotationMatrix
[
0
][
1
]
*
atom2
.
inducedDipole
.
x
+
qiRotationMatrix
[
0
][
2
]
*
atom2
.
inducedDipole
.
y
+
qiRotationMatrix
[
0
][
0
]
*
atom2
.
inducedDipole
.
z
,
qiRotationMatrix
[
1
][
1
]
*
atom2
.
inducedDipole
.
x
+
qiRotationMatrix
[
1
][
2
]
*
atom2
.
inducedDipole
.
y
+
qiRotationMatrix
[
1
][
0
]
*
atom2
.
inducedDipole
.
z
,
qiRotationMatrix
[
2
][
1
]
*
atom2
.
inducedDipole
.
x
+
qiRotationMatrix
[
2
][
2
]
*
atom2
.
inducedDipole
.
y
+
qiRotationMatrix
[
2
][
0
]
*
atom2
.
inducedDipole
.
z
);
real3
qiUinpI
=
0.5
f
*
make_real3
(
qiRotationMatrix
[
0
][
1
]
*
atom1
.
inducedDipolePolar
.
x
+
qiRotationMatrix
[
0
][
2
]
*
atom1
.
inducedDipolePolar
.
y
+
qiRotationMatrix
[
0
][
0
]
*
atom1
.
inducedDipolePolar
.
z
,
qiRotationMatrix
[
1
][
1
]
*
atom1
.
inducedDipolePolar
.
x
+
qiRotationMatrix
[
1
][
2
]
*
atom1
.
inducedDipolePolar
.
y
+
qiRotationMatrix
[
1
][
0
]
*
atom1
.
inducedDipolePolar
.
z
,
qiRotationMatrix
[
2
][
1
]
*
atom1
.
inducedDipolePolar
.
x
+
qiRotationMatrix
[
2
][
2
]
*
atom1
.
inducedDipolePolar
.
y
+
qiRotationMatrix
[
2
][
0
]
*
atom1
.
inducedDipolePolar
.
z
);
real3
qiUinpJ
=
0.5
f
*
make_real3
(
qiRotationMatrix
[
0
][
1
]
*
atom2
.
inducedDipolePolar
.
x
+
qiRotationMatrix
[
0
][
2
]
*
atom2
.
inducedDipolePolar
.
y
+
qiRotationMatrix
[
0
][
0
]
*
atom2
.
inducedDipolePolar
.
z
,
qiRotationMatrix
[
1
][
1
]
*
atom2
.
inducedDipolePolar
.
x
+
qiRotationMatrix
[
1
][
2
]
*
atom2
.
inducedDipolePolar
.
y
+
qiRotationMatrix
[
1
][
0
]
*
atom2
.
inducedDipolePolar
.
z
,
qiRotationMatrix
[
2
][
1
]
*
atom2
.
inducedDipolePolar
.
x
+
qiRotationMatrix
[
2
][
2
]
*
atom2
.
inducedDipolePolar
.
y
+
qiRotationMatrix
[
2
][
0
]
*
atom2
.
inducedDipolePolar
.
z
);
real3
rotatedDipole1
=
rotateDipole
(
atom1
.
sphericalDipole
,
qiRotationMatrix
);
real3
rotatedDipole2
=
rotateDipole
(
atom2
.
sphericalDipole
,
qiRotationMatrix
);
real
rotatedQuadrupole1
[]
=
{
0
,
0
,
0
,
0
,
0
};
real
rotatedQuadrupole2
[]
=
{
0
,
0
,
0
,
0
,
0
};
#ifdef INCLUDE_QUADRUPOLES
rotateQuadupoles
(
qiRotationMatrix
,
atom1
.
sphericalQuadrupole
,
atom2
.
sphericalQuadrupole
,
rotatedQuadrupole1
,
rotatedQuadrupole2
);
#endif
// The field derivatives at I due to permanent and induced moments on J, and vice-versa.
// Also, their derivatives w.r.t. R, which are needed for force calculations
real
Vij
[
9
],
Vji
[
9
],
VjiR
[
9
],
VijR
[
9
];
// The field derivatives at I due to only permanent moments on J, and vice-versa.
real
Vijp
[
3
],
Vijd
[
3
],
Vjip
[
3
],
Vjid
[
3
];
real
rInvVec
[
7
];
// The rInvVec array is defined such that the ith element is R^-i, with the
// dieleectric constant folded in, to avoid conversions later.
rInvVec
[
1
]
=
rInv
;
for
(
int
i
=
2
;
i
<
7
;
++
i
)
rInvVec
[
i
]
=
rInvVec
[
i
-
1
]
*
rInv
;
real
dmp
=
atom1
.
damp
*
atom2
.
damp
;
real
a
=
min
(
atom1
.
thole
,
atom2
.
thole
);
real
u
=
fabs
(
dmp
)
>
1.0e-5
f
?
r
/
dmp
:
1e10
f
;
real
au3
=
a
*
u
*
u
*
u
;
real
expau3
=
au3
<
50
?
EXP
(
-
au3
)
:
0
;
real
a2u6
=
au3
*
au3
;
real
a3u9
=
a2u6
*
au3
;
// Thole damping factors for energies
real
thole_c
=
1
-
expau3
;
real
thole_d0
=
1
-
expau3
*
(
1
+
1.5
f
*
au3
);
real
thole_d1
=
1
-
expau3
;
real
thole_q0
=
1
-
expau3
*
(
1
+
au3
+
a2u6
);
real
thole_q1
=
1
-
expau3
*
(
1
+
au3
);
// Thole damping factors for derivatives
real
dthole_c
=
1
-
expau3
*
(
1
+
1.5
f
*
au3
);
real
dthole_d0
=
1
-
expau3
*
(
1
+
au3
+
1.5
f
*
a2u6
);
real
dthole_d1
=
1
-
expau3
*
(
1
+
au3
);
real
dthole_q0
=
1
-
expau3
*
(
1
+
au3
+
0.25
f
*
a2u6
+
0.75
f
*
a3u9
);
real
dthole_q1
=
1
-
expau3
*
(
1
+
au3
+
0.75
f
*
a2u6
);
// Now we compute the (attenuated) Coulomb operator and its derivatives, contracted with
// permanent moments and induced dipoles. Note that the coefficient of the permanent force
// terms is half of the expected value; this is because we compute the interaction of I with
// the sum of induced and permanent moments on J, as well as the interaction of J with I's
// permanent and induced moments; doing so double counts the permanent-permanent interaction.
real
ePermCoef
,
dPermCoef
,
eUIndCoef
,
dUIndCoef
,
eUInpCoef
,
dUInpCoef
;
// C-C terms (m=0)
ePermCoef
=
rInvVec
[
1
]
*
mScale
;
dPermCoef
=
-
0.5
f
*
mScale
*
rInvVec
[
2
];
Vij
[
0
]
=
ePermCoef
*
atom2
.
q
;
Vji
[
0
]
=
ePermCoef
*
atom1
.
q
;
VijR
[
0
]
=
dPermCoef
*
atom2
.
q
;
VjiR
[
0
]
=
dPermCoef
*
atom1
.
q
;
// C-D and C-Uind terms (m=0)
ePermCoef
=
rInvVec
[
2
]
*
mScale
;
eUIndCoef
=
rInvVec
[
2
]
*
pScale
*
thole_c
;
eUInpCoef
=
rInvVec
[
2
]
*
dScale
*
thole_c
;
dPermCoef
=
-
rInvVec
[
3
]
*
mScale
;
dUIndCoef
=
-
2
*
rInvVec
[
3
]
*
pScale
*
dthole_c
;
dUInpCoef
=
-
2
*
rInvVec
[
3
]
*
dScale
*
dthole_c
;
Vij
[
0
]
+=
-
(
ePermCoef
*
rotatedDipole2
.
x
+
eUIndCoef
*
qiUindJ
.
x
+
eUInpCoef
*
qiUinpJ
.
x
);
Vji
[
1
]
=
-
(
ePermCoef
*
atom1
.
q
);
VijR
[
0
]
+=
-
(
dPermCoef
*
rotatedDipole2
.
x
+
dUIndCoef
*
qiUindJ
.
x
+
dUInpCoef
*
qiUinpJ
.
x
);
VjiR
[
1
]
=
-
(
dPermCoef
*
atom1
.
q
);
Vjip
[
0
]
=
-
(
eUInpCoef
*
atom1
.
q
);
Vjid
[
0
]
=
-
(
eUIndCoef
*
atom1
.
q
);
// D-C and Uind-C terms (m=0)
Vij
[
1
]
=
ePermCoef
*
atom2
.
q
;
Vji
[
0
]
+=
ePermCoef
*
rotatedDipole1
.
x
+
eUIndCoef
*
qiUindI
.
x
+
eUInpCoef
*
qiUinpI
.
x
;
VijR
[
1
]
=
dPermCoef
*
atom2
.
q
;
VjiR
[
0
]
+=
dPermCoef
*
rotatedDipole1
.
x
+
dUIndCoef
*
qiUindI
.
x
+
dUInpCoef
*
qiUinpI
.
x
;
Vijp
[
0
]
=
eUInpCoef
*
atom2
.
q
;
Vijd
[
0
]
=
eUIndCoef
*
atom2
.
q
;
// D-D and D-Uind terms (m=0)
ePermCoef
=
-
2
*
rInvVec
[
3
]
*
mScale
;
eUIndCoef
=
-
2
*
rInvVec
[
3
]
*
pScale
*
thole_d0
;
eUInpCoef
=
-
2
*
rInvVec
[
3
]
*
dScale
*
thole_d0
;
dPermCoef
=
3
*
rInvVec
[
4
]
*
mScale
;
dUIndCoef
=
6
*
rInvVec
[
4
]
*
pScale
*
dthole_d0
;
dUInpCoef
=
6
*
rInvVec
[
4
]
*
dScale
*
dthole_d0
;
Vij
[
1
]
+=
ePermCoef
*
rotatedDipole2
.
x
+
eUIndCoef
*
qiUindJ
.
x
+
eUInpCoef
*
qiUinpJ
.
x
;
Vji
[
1
]
+=
ePermCoef
*
rotatedDipole1
.
x
+
eUIndCoef
*
qiUindI
.
x
+
eUInpCoef
*
qiUinpI
.
x
;
VijR
[
1
]
+=
dPermCoef
*
rotatedDipole2
.
x
+
dUIndCoef
*
qiUindJ
.
x
+
dUInpCoef
*
qiUinpJ
.
x
;
VjiR
[
1
]
+=
dPermCoef
*
rotatedDipole1
.
x
+
dUIndCoef
*
qiUindI
.
x
+
dUInpCoef
*
qiUinpI
.
x
;
Vijp
[
0
]
+=
eUInpCoef
*
rotatedDipole2
.
x
;
Vijd
[
0
]
+=
eUIndCoef
*
rotatedDipole2
.
x
;
Vjip
[
0
]
+=
eUInpCoef
*
rotatedDipole1
.
x
;
Vjid
[
0
]
+=
eUIndCoef
*
rotatedDipole1
.
x
;
// D-D and D-Uind terms (m=1)
ePermCoef
=
rInvVec
[
3
]
*
mScale
;
eUIndCoef
=
rInvVec
[
3
]
*
pScale
*
thole_d1
;
eUInpCoef
=
rInvVec
[
3
]
*
dScale
*
thole_d1
;
dPermCoef
=
-
1.5
f
*
rInvVec
[
4
]
*
mScale
;
dUIndCoef
=
-
3
*
rInvVec
[
4
]
*
pScale
*
dthole_d1
;
dUInpCoef
=
-
3
*
rInvVec
[
4
]
*
dScale
*
dthole_d1
;
Vij
[
2
]
=
ePermCoef
*
rotatedDipole2
.
y
+
eUIndCoef
*
qiUindJ
.
y
+
eUInpCoef
*
qiUinpJ
.
y
;
Vji
[
2
]
=
ePermCoef
*
rotatedDipole1
.
y
+
eUIndCoef
*
qiUindI
.
y
+
eUInpCoef
*
qiUinpI
.
y
;
VijR
[
2
]
=
dPermCoef
*
rotatedDipole2
.
y
+
dUIndCoef
*
qiUindJ
.
y
+
dUInpCoef
*
qiUinpJ
.
y
;
VjiR
[
2
]
=
dPermCoef
*
rotatedDipole1
.
y
+
dUIndCoef
*
qiUindI
.
y
+
dUInpCoef
*
qiUinpI
.
y
;
Vij
[
3
]
=
ePermCoef
*
rotatedDipole2
.
z
+
eUIndCoef
*
qiUindJ
.
z
+
eUInpCoef
*
qiUinpJ
.
z
;
Vji
[
3
]
=
ePermCoef
*
rotatedDipole1
.
z
+
eUIndCoef
*
qiUindI
.
z
+
eUInpCoef
*
qiUinpI
.
z
;
VijR
[
3
]
=
dPermCoef
*
rotatedDipole2
.
z
+
dUIndCoef
*
qiUindJ
.
z
+
dUInpCoef
*
qiUinpJ
.
z
;
VjiR
[
3
]
=
dPermCoef
*
rotatedDipole1
.
z
+
dUIndCoef
*
qiUindI
.
z
+
dUInpCoef
*
qiUinpI
.
z
;
Vijp
[
1
]
=
eUInpCoef
*
rotatedDipole2
.
y
;
Vijd
[
1
]
=
eUIndCoef
*
rotatedDipole2
.
y
;
Vjip
[
1
]
=
eUInpCoef
*
rotatedDipole1
.
y
;
Vjid
[
1
]
=
eUIndCoef
*
rotatedDipole1
.
y
;
Vijp
[
2
]
=
eUInpCoef
*
rotatedDipole2
.
z
;
Vijd
[
2
]
=
eUIndCoef
*
rotatedDipole2
.
z
;
Vjip
[
2
]
=
eUInpCoef
*
rotatedDipole1
.
z
;
Vjid
[
2
]
=
eUIndCoef
*
rotatedDipole1
.
z
;
// C-Q terms (m=0)
ePermCoef
=
mScale
*
rInvVec
[
3
];
dPermCoef
=
-
1.5
f
*
rInvVec
[
4
]
*
mScale
;
Vij
[
0
]
+=
ePermCoef
*
rotatedQuadrupole2
[
0
];
Vji
[
4
]
=
ePermCoef
*
atom1
.
q
;
VijR
[
0
]
+=
dPermCoef
*
rotatedQuadrupole2
[
0
];
VjiR
[
4
]
=
dPermCoef
*
atom1
.
q
;
// Q-C terms (m=0)
Vij
[
4
]
=
ePermCoef
*
atom2
.
q
;
Vji
[
0
]
+=
ePermCoef
*
rotatedQuadrupole1
[
0
];
VijR
[
4
]
=
dPermCoef
*
atom2
.
q
;
VjiR
[
0
]
+=
dPermCoef
*
rotatedQuadrupole1
[
0
];
// D-Q and Uind-Q terms (m=0)
ePermCoef
=
rInvVec
[
4
]
*
3.0
*
mScale
;
eUIndCoef
=
rInvVec
[
4
]
*
3.0
*
pScale
*
thole_q0
;
eUInpCoef
=
rInvVec
[
4
]
*
3.0
*
dScale
*
thole_q0
;
dPermCoef
=
-
6
*
rInvVec
[
5
]
*
mScale
;
dUIndCoef
=
-
12
*
rInvVec
[
5
]
*
pScale
*
dthole_q0
;
dUInpCoef
=
-
12
*
rInvVec
[
5
]
*
dScale
*
dthole_q0
;
Vij
[
1
]
+=
ePermCoef
*
rotatedQuadrupole2
[
0
];
Vji
[
4
]
+=
ePermCoef
*
rotatedDipole1
.
x
+
eUIndCoef
*
qiUindI
.
x
+
eUInpCoef
*
qiUinpI
.
x
;
VijR
[
1
]
+=
dPermCoef
*
rotatedQuadrupole2
[
0
];
VjiR
[
4
]
+=
dPermCoef
*
rotatedDipole1
.
x
+
dUIndCoef
*
qiUindI
.
x
+
dUInpCoef
*
qiUinpI
.
x
;
Vijp
[
0
]
+=
eUInpCoef
*
rotatedQuadrupole2
[
0
];
Vijd
[
0
]
+=
eUIndCoef
*
rotatedQuadrupole2
[
0
];
// Q-D and Q-Uind terms (m=0)
Vij
[
4
]
+=
-
(
ePermCoef
*
rotatedDipole2
.
x
+
eUIndCoef
*
qiUindJ
.
x
+
eUInpCoef
*
qiUinpJ
.
x
);
Vji
[
1
]
+=
-
(
ePermCoef
*
rotatedQuadrupole1
[
0
]);
VijR
[
4
]
+=
-
(
dPermCoef
*
rotatedDipole2
.
x
+
dUIndCoef
*
qiUindJ
.
x
+
dUInpCoef
*
qiUinpJ
.
x
);
VjiR
[
1
]
+=
-
(
dPermCoef
*
rotatedQuadrupole1
[
0
]);
Vjip
[
0
]
+=
-
(
eUInpCoef
*
rotatedQuadrupole1
[
0
]);
Vjid
[
0
]
+=
-
(
eUIndCoef
*
rotatedQuadrupole1
[
0
]);
// D-Q and Uind-Q terms (m=1)
const
real
sqrtThree
=
SQRT
((
real
)
3
);
ePermCoef
=
-
sqrtThree
*
rInvVec
[
4
]
*
mScale
;
eUIndCoef
=
-
sqrtThree
*
rInvVec
[
4
]
*
pScale
*
thole_q1
;
eUInpCoef
=
-
sqrtThree
*
rInvVec
[
4
]
*
dScale
*
thole_q1
;
dPermCoef
=
2
*
sqrtThree
*
rInvVec
[
5
]
*
mScale
;
dUIndCoef
=
4
*
sqrtThree
*
rInvVec
[
5
]
*
pScale
*
dthole_q1
;
dUInpCoef
=
4
*
sqrtThree
*
rInvVec
[
5
]
*
dScale
*
dthole_q1
;
Vij
[
2
]
+=
ePermCoef
*
rotatedQuadrupole2
[
1
];
Vji
[
5
]
=
ePermCoef
*
rotatedDipole1
.
y
+
eUIndCoef
*
qiUindI
.
y
+
eUInpCoef
*
qiUinpI
.
y
;
VijR
[
2
]
+=
dPermCoef
*
rotatedQuadrupole2
[
1
];
VjiR
[
5
]
=
dPermCoef
*
rotatedDipole1
.
y
+
dUIndCoef
*
qiUindI
.
y
+
dUInpCoef
*
qiUinpI
.
y
;
Vij
[
3
]
+=
ePermCoef
*
rotatedQuadrupole2
[
2
];
Vji
[
6
]
=
ePermCoef
*
rotatedDipole1
.
z
+
eUIndCoef
*
qiUindI
.
z
+
eUInpCoef
*
qiUinpI
.
z
;
VijR
[
3
]
+=
dPermCoef
*
rotatedQuadrupole2
[
2
];
VjiR
[
6
]
=
dPermCoef
*
rotatedDipole1
.
z
+
dUIndCoef
*
qiUindI
.
z
+
dUInpCoef
*
qiUinpI
.
z
;
Vijp
[
1
]
+=
eUInpCoef
*
rotatedQuadrupole2
[
1
];
Vijd
[
1
]
+=
eUIndCoef
*
rotatedQuadrupole2
[
1
];
Vijp
[
2
]
+=
eUInpCoef
*
rotatedQuadrupole2
[
2
];
Vijd
[
2
]
+=
eUIndCoef
*
rotatedQuadrupole2
[
2
];
// D-Q and Uind-Q terms (m=1)
Vij
[
5
]
=
-
(
ePermCoef
*
rotatedDipole2
.
y
+
eUIndCoef
*
qiUindJ
.
y
+
eUInpCoef
*
qiUinpJ
.
y
);
Vji
[
2
]
+=
-
(
ePermCoef
*
rotatedQuadrupole1
[
1
]);
VijR
[
5
]
=
-
(
dPermCoef
*
rotatedDipole2
.
y
+
dUIndCoef
*
qiUindJ
.
y
+
dUInpCoef
*
qiUinpJ
.
y
);
VjiR
[
2
]
+=
-
(
dPermCoef
*
rotatedQuadrupole1
[
1
]);
Vij
[
6
]
=
-
(
ePermCoef
*
rotatedDipole2
.
z
+
eUIndCoef
*
qiUindJ
.
z
+
eUInpCoef
*
qiUinpJ
.
z
);
Vji
[
3
]
+=
-
(
ePermCoef
*
rotatedQuadrupole1
[
2
]);
VijR
[
6
]
=
-
(
dPermCoef
*
rotatedDipole2
.
z
+
dUIndCoef
*
qiUindJ
.
z
+
dUInpCoef
*
qiUinpJ
.
z
);
VjiR
[
3
]
+=
-
(
dPermCoef
*
rotatedQuadrupole1
[
2
]);
Vjip
[
1
]
+=
-
(
eUInpCoef
*
rotatedQuadrupole1
[
1
]);
Vjid
[
1
]
+=
-
(
eUIndCoef
*
rotatedQuadrupole1
[
1
]);
Vjip
[
2
]
+=
-
(
eUInpCoef
*
rotatedQuadrupole1
[
2
]);
Vjid
[
2
]
+=
-
(
eUIndCoef
*
rotatedQuadrupole1
[
2
]);
// Q-Q terms (m=0)
ePermCoef
=
6
*
rInvVec
[
5
]
*
mScale
;
dPermCoef
=
-
15
*
rInvVec
[
6
]
*
mScale
;
Vij
[
4
]
+=
ePermCoef
*
rotatedQuadrupole2
[
0
];
Vji
[
4
]
+=
ePermCoef
*
rotatedQuadrupole1
[
0
];
VijR
[
4
]
+=
dPermCoef
*
rotatedQuadrupole2
[
0
];
VjiR
[
4
]
+=
dPermCoef
*
rotatedQuadrupole1
[
0
];
// Q-Q terms (m=1)
ePermCoef
=
-
4
*
rInvVec
[
5
]
*
mScale
;
dPermCoef
=
10
*
rInvVec
[
6
]
*
mScale
;
Vij
[
5
]
+=
ePermCoef
*
rotatedQuadrupole2
[
1
];
Vji
[
5
]
+=
ePermCoef
*
rotatedQuadrupole1
[
1
];
VijR
[
5
]
+=
dPermCoef
*
rotatedQuadrupole2
[
1
];
VjiR
[
5
]
+=
dPermCoef
*
rotatedQuadrupole1
[
1
];
Vij
[
6
]
+=
ePermCoef
*
rotatedQuadrupole2
[
2
];
Vji
[
6
]
+=
ePermCoef
*
rotatedQuadrupole1
[
2
];
VijR
[
6
]
+=
dPermCoef
*
rotatedQuadrupole2
[
2
];
VjiR
[
6
]
+=
dPermCoef
*
rotatedQuadrupole1
[
2
];
// Q-Q terms (m=2)
ePermCoef
=
rInvVec
[
5
]
*
mScale
;
dPermCoef
=
-
2.5
f
*
rInvVec
[
6
]
*
mScale
;
Vij
[
7
]
=
ePermCoef
*
rotatedQuadrupole2
[
3
];
Vji
[
7
]
=
ePermCoef
*
rotatedQuadrupole1
[
3
];
VijR
[
7
]
=
dPermCoef
*
rotatedQuadrupole2
[
3
];
VjiR
[
7
]
=
dPermCoef
*
rotatedQuadrupole1
[
3
];
Vij
[
8
]
=
ePermCoef
*
rotatedQuadrupole2
[
4
];
Vji
[
8
]
=
ePermCoef
*
rotatedQuadrupole1
[
4
];
VijR
[
8
]
=
dPermCoef
*
rotatedQuadrupole2
[
4
];
VjiR
[
8
]
=
dPermCoef
*
rotatedQuadrupole1
[
4
];
// Evaluate the energies, forces and torques due to permanent+induced moments
// interacting with just the permanent moments.
energy
+=
forceFactor
*
0.5
f
*
(
atom1
.
q
*
Vij
[
0
]
+
rotatedDipole1
.
x
*
Vij
[
1
]
+
rotatedDipole1
.
y
*
Vij
[
2
]
+
rotatedDipole1
.
z
*
Vij
[
3
]
+
rotatedQuadrupole1
[
0
]
*
Vij
[
4
]
+
rotatedQuadrupole1
[
1
]
*
Vij
[
5
]
+
rotatedQuadrupole1
[
2
]
*
Vij
[
6
]
+
rotatedQuadrupole1
[
3
]
*
Vij
[
7
]
+
rotatedQuadrupole1
[
4
]
*
Vij
[
8
]
+
atom2
.
q
*
Vji
[
0
]
+
rotatedDipole2
.
x
*
Vji
[
1
]
+
rotatedDipole2
.
y
*
Vji
[
2
]
+
rotatedDipole2
.
z
*
Vji
[
3
]
+
rotatedQuadrupole2
[
0
]
*
Vji
[
4
]
+
rotatedQuadrupole2
[
1
]
*
Vji
[
5
]
+
rotatedQuadrupole2
[
2
]
*
Vji
[
6
]
+
rotatedQuadrupole2
[
3
]
*
Vji
[
7
]
+
rotatedQuadrupole2
[
4
]
*
Vji
[
8
]);
real
fIZ
=
atom1
.
q
*
VijR
[
0
]
+
rotatedDipole1
.
x
*
VijR
[
1
]
+
rotatedDipole1
.
y
*
VijR
[
2
]
+
rotatedDipole1
.
z
*
VijR
[
3
]
+
rotatedQuadrupole1
[
0
]
*
VijR
[
4
]
+
rotatedQuadrupole1
[
1
]
*
VijR
[
5
]
+
rotatedQuadrupole1
[
2
]
*
VijR
[
6
]
+
rotatedQuadrupole1
[
3
]
*
VijR
[
7
]
+
rotatedQuadrupole1
[
4
]
*
VijR
[
8
];
real
fJZ
=
atom2
.
q
*
VjiR
[
0
]
+
rotatedDipole2
.
x
*
VjiR
[
1
]
+
rotatedDipole2
.
y
*
VjiR
[
2
]
+
rotatedDipole2
.
z
*
VjiR
[
3
]
+
rotatedQuadrupole2
[
0
]
*
VjiR
[
4
]
+
rotatedQuadrupole2
[
1
]
*
VjiR
[
5
]
+
rotatedQuadrupole2
[
2
]
*
VjiR
[
6
]
+
rotatedQuadrupole2
[
3
]
*
VjiR
[
7
]
+
rotatedQuadrupole2
[
4
]
*
VjiR
[
8
];
real
EIX
=
rotatedDipole1
.
z
*
Vij
[
1
]
-
rotatedDipole1
.
x
*
Vij
[
3
]
+
sqrtThree
*
rotatedQuadrupole1
[
2
]
*
Vij
[
4
]
+
rotatedQuadrupole1
[
4
]
*
Vij
[
5
]
-
(
sqrtThree
*
rotatedQuadrupole1
[
0
]
+
rotatedQuadrupole1
[
3
])
*
Vij
[
6
]
+
rotatedQuadrupole1
[
2
]
*
Vij
[
7
]
-
rotatedQuadrupole1
[
1
]
*
Vij
[
8
];
real
EIY
=
-
rotatedDipole1
.
y
*
Vij
[
1
]
+
rotatedDipole1
.
x
*
Vij
[
2
]
-
sqrtThree
*
rotatedQuadrupole1
[
1
]
*
Vij
[
4
]
+
(
sqrtThree
*
rotatedQuadrupole1
[
0
]
-
rotatedQuadrupole1
[
3
])
*
Vij
[
5
]
-
rotatedQuadrupole1
[
4
]
*
Vij
[
6
]
+
rotatedQuadrupole1
[
1
]
*
Vij
[
7
]
+
rotatedQuadrupole1
[
2
]
*
Vij
[
8
];
real
EIZ
=
-
rotatedDipole1
.
z
*
Vij
[
2
]
+
rotatedDipole1
.
y
*
Vij
[
3
]
-
rotatedQuadrupole1
[
2
]
*
Vij
[
5
]
+
rotatedQuadrupole1
[
1
]
*
Vij
[
6
]
-
2
*
rotatedQuadrupole1
[
4
]
*
Vij
[
7
]
+
2
*
rotatedQuadrupole1
[
3
]
*
Vij
[
8
];
real
EJX
=
rotatedDipole2
.
z
*
Vji
[
1
]
-
rotatedDipole2
.
x
*
Vji
[
3
]
+
sqrtThree
*
rotatedQuadrupole2
[
2
]
*
Vji
[
4
]
+
rotatedQuadrupole2
[
4
]
*
Vji
[
5
]
-
(
sqrtThree
*
rotatedQuadrupole2
[
0
]
+
rotatedQuadrupole2
[
3
])
*
Vji
[
6
]
+
rotatedQuadrupole2
[
2
]
*
Vji
[
7
]
-
rotatedQuadrupole2
[
1
]
*
Vji
[
8
];
real
EJY
=
-
rotatedDipole2
.
y
*
Vji
[
1
]
+
rotatedDipole2
.
x
*
Vji
[
2
]
-
sqrtThree
*
rotatedQuadrupole2
[
1
]
*
Vji
[
4
]
+
(
sqrtThree
*
rotatedQuadrupole2
[
0
]
-
rotatedQuadrupole2
[
3
])
*
Vji
[
5
]
-
rotatedQuadrupole2
[
4
]
*
Vji
[
6
]
+
rotatedQuadrupole2
[
1
]
*
Vji
[
7
]
+
rotatedQuadrupole2
[
2
]
*
Vji
[
8
];
real
EJZ
=
-
rotatedDipole2
.
z
*
Vji
[
2
]
+
rotatedDipole2
.
y
*
Vji
[
3
]
-
rotatedQuadrupole2
[
2
]
*
Vji
[
5
]
+
rotatedQuadrupole2
[
1
]
*
Vji
[
6
]
-
2
*
rotatedQuadrupole2
[
4
]
*
Vji
[
7
]
+
2
*
rotatedQuadrupole2
[
3
]
*
Vji
[
8
];
// Define the torque intermediates for the induced dipoles. These are simply the induced dipole torque
// intermediates dotted with the field due to permanent moments only, at each center. We inline the
// induced dipole torque intermediates here, for simplicity. N.B. There are no torques on the dipoles
// themselves, so we accumulate the torque intermediates into separate variables to allow them to be
// used only in the force calculation.
//
// The torque about the x axis (needed to obtain the y force on the induced dipoles, below)
// qiUindIx[0] = qiQUindI[2]; qiUindIx[1] = 0; qiUindIx[2] = -qiQUindI[0]
real
iEIX
=
qiUinpI
.
z
*
Vijp
[
0
]
+
qiUindI
.
z
*
Vijd
[
0
]
-
qiUinpI
.
x
*
Vijp
[
2
]
-
qiUindI
.
x
*
Vijd
[
2
];
real
iEJX
=
qiUinpJ
.
z
*
Vjip
[
0
]
+
qiUindJ
.
z
*
Vjid
[
0
]
-
qiUinpJ
.
x
*
Vjip
[
2
]
-
qiUindJ
.
x
*
Vjid
[
2
];
// The torque about the y axis (needed to obtain the x force on the induced dipoles, below)
// qiUindIy[0] = -qiQUindI[1]; qiUindIy[1] = qiQUindI[0]; qiUindIy[2] = 0
real
iEIY
=
qiUinpI
.
x
*
Vijp
[
1
]
+
qiUindI
.
x
*
Vijd
[
1
]
-
qiUinpI
.
y
*
Vijp
[
0
]
-
qiUindI
.
y
*
Vijd
[
0
];
real
iEJY
=
qiUinpJ
.
x
*
Vjip
[
1
]
+
qiUindJ
.
x
*
Vjid
[
1
]
-
qiUinpJ
.
y
*
Vjip
[
0
]
-
qiUindJ
.
y
*
Vjid
[
0
];
#ifdef USE_MUTUAL_POLARIZATION
// Uind-Uind terms (m=0)
real
eCoef
=
-
4
*
rInvVec
[
3
]
*
thole_d0
;
real
dCoef
=
6
*
rInvVec
[
4
]
*
dthole_d0
;
iEIX
+=
eCoef
*
(
qiUinpI
.
z
*
qiUindJ
.
x
+
qiUindI
.
z
*
qiUinpJ
.
x
);
iEJX
+=
eCoef
*
(
qiUinpJ
.
z
*
qiUindI
.
x
+
qiUindJ
.
z
*
qiUinpI
.
x
);
iEIY
-=
eCoef
*
(
qiUinpI
.
y
*
qiUindJ
.
x
+
qiUindI
.
y
*
qiUinpJ
.
x
);
iEJY
-=
eCoef
*
(
qiUinpJ
.
y
*
qiUindI
.
x
+
qiUindJ
.
y
*
qiUinpI
.
x
);
fIZ
+=
dCoef
*
(
qiUinpI
.
x
*
qiUindJ
.
x
+
qiUindI
.
x
*
qiUinpJ
.
x
);
fIZ
+=
dCoef
*
(
qiUinpJ
.
x
*
qiUindI
.
x
+
qiUindJ
.
x
*
qiUinpI
.
x
);
// Uind-Uind terms (m=1)
eCoef
=
2
*
rInvVec
[
3
]
*
thole_d1
;
dCoef
=
-
3
*
rInvVec
[
4
]
*
dthole_d1
;
iEIX
-=
eCoef
*
(
qiUinpI
.
x
*
qiUindJ
.
z
+
qiUindI
.
x
*
qiUinpJ
.
z
);
iEJX
-=
eCoef
*
(
qiUinpJ
.
x
*
qiUindI
.
z
+
qiUindJ
.
x
*
qiUinpI
.
z
);
iEIY
+=
eCoef
*
(
qiUinpI
.
x
*
qiUindJ
.
y
+
qiUindI
.
x
*
qiUinpJ
.
y
);
iEJY
+=
eCoef
*
(
qiUinpJ
.
x
*
qiUindI
.
y
+
qiUindJ
.
x
*
qiUinpI
.
y
);
fIZ
+=
dCoef
*
(
qiUinpI
.
y
*
qiUindJ
.
y
+
qiUindI
.
y
*
qiUinpJ
.
y
+
qiUinpI
.
z
*
qiUindJ
.
z
+
qiUindI
.
z
*
qiUinpJ
.
z
);
fIZ
+=
dCoef
*
(
qiUinpJ
.
y
*
qiUindI
.
y
+
qiUindJ
.
y
*
qiUinpI
.
y
+
qiUinpJ
.
z
*
qiUindI
.
z
+
qiUindJ
.
z
*
qiUinpI
.
z
);
#endif
// The quasi-internal frame forces and torques. Note that the induced torque intermediates are
// used in the force expression, but not in the torques; the induced dipoles are isotropic.
real
qiForce
[
3
]
=
{
rInv
*
(
EIY
+
EJY
+
iEIY
+
iEJY
),
-
rInv
*
(
EIX
+
EJX
+
iEIX
+
iEJX
),
-
(
fJZ
+
fIZ
)};
real
qiTorqueI
[
3
]
=
{
-
EIX
,
-
EIY
,
-
EIZ
};
real
qiTorqueJ
[
3
]
=
{
-
EJX
,
-
EJY
,
-
EJZ
};
real3
force
=
make_real3
(
qiRotationMatrix
[
1
][
1
]
*
qiForce
[
0
]
+
qiRotationMatrix
[
2
][
1
]
*
qiForce
[
1
]
+
qiRotationMatrix
[
0
][
1
]
*
qiForce
[
2
],
qiRotationMatrix
[
1
][
2
]
*
qiForce
[
0
]
+
qiRotationMatrix
[
2
][
2
]
*
qiForce
[
1
]
+
qiRotationMatrix
[
0
][
2
]
*
qiForce
[
2
],
qiRotationMatrix
[
1
][
0
]
*
qiForce
[
0
]
+
qiRotationMatrix
[
2
][
0
]
*
qiForce
[
1
]
+
qiRotationMatrix
[
0
][
0
]
*
qiForce
[
2
]);
atom1
.
force
+=
force
;
atom1
.
torque
+=
make_real3
(
qiRotationMatrix
[
1
][
1
]
*
qiTorqueI
[
0
]
+
qiRotationMatrix
[
2
][
1
]
*
qiTorqueI
[
1
]
+
qiRotationMatrix
[
0
][
1
]
*
qiTorqueI
[
2
],
qiRotationMatrix
[
1
][
2
]
*
qiTorqueI
[
0
]
+
qiRotationMatrix
[
2
][
2
]
*
qiTorqueI
[
1
]
+
qiRotationMatrix
[
0
][
2
]
*
qiTorqueI
[
2
],
qiRotationMatrix
[
1
][
0
]
*
qiTorqueI
[
0
]
+
qiRotationMatrix
[
2
][
0
]
*
qiTorqueI
[
1
]
+
qiRotationMatrix
[
0
][
0
]
*
qiTorqueI
[
2
]);
if
(
forceFactor
==
1
)
{
atom2
.
force
-=
force
;
atom2
.
torque
+=
make_real3
(
qiRotationMatrix
[
1
][
1
]
*
qiTorqueJ
[
0
]
+
qiRotationMatrix
[
2
][
1
]
*
qiTorqueJ
[
1
]
+
qiRotationMatrix
[
0
][
1
]
*
qiTorqueJ
[
2
],
qiRotationMatrix
[
1
][
2
]
*
qiTorqueJ
[
0
]
+
qiRotationMatrix
[
2
][
2
]
*
qiTorqueJ
[
1
]
+
qiRotationMatrix
[
0
][
2
]
*
qiTorqueJ
[
2
],
qiRotationMatrix
[
1
][
0
]
*
qiTorqueJ
[
0
]
+
qiRotationMatrix
[
2
][
0
]
*
qiTorqueJ
[
1
]
+
qiRotationMatrix
[
0
][
0
]
*
qiTorqueJ
[
2
]);
}
}
/**
* Compute electrostatic interactions.
*/
...
...
@@ -69,7 +382,7 @@ extern "C" __global__ void computeElectrostatics(
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
#endif
const
real
*
__restrict__
labFrame
Dipole
,
const
real
*
__restrict__
labFrame
Quadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
spherical
Dipole
,
const
real
*
__restrict__
spherical
Quadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float2
*
__restrict__
dampingAndThole
)
{
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
...
...
@@ -77,7 +390,6 @@ extern "C" __global__ void computeElectrostatics(
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
real
energy
=
0
;
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
// First loop: process tiles that contain exclusions.
...
...
@@ -89,21 +401,23 @@ extern "C" __global__ void computeElectrostatics(
const
unsigned
int
y
=
tileIndices
.
y
;
AtomData
data
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
loadAtomData
(
data
,
atom1
,
posq
,
labFrameDipole
,
labFrame
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
loadAtomData
(
data
,
atom1
,
posq
,
sphericalDipole
,
spherical
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
data
.
force
=
make_real3
(
0
);
data
.
torque
=
make_real3
(
0
);
uint2
covalent
=
covalentFlags
[
pos
*
TILE_SIZE
+
tgx
];
unsigned
int
polarizationGroup
=
polarizationGroupFlags
[
pos
*
TILE_SIZE
+
tgx
];
if
(
x
==
y
)
{
// This tile is on the diagonal.
localData
[
threadIdx
.
x
].
posq
=
data
.
posq
;
localData
[
threadIdx
.
x
].
dipole
=
data
.
dipole
;
localData
[
threadIdx
.
x
].
pos
=
data
.
pos
;
localData
[
threadIdx
.
x
].
q
=
data
.
q
;
localData
[
threadIdx
.
x
].
sphericalDipole
=
data
.
sphericalDipole
;
#ifdef INCLUDE_QUADRUPOLES
localData
[
threadIdx
.
x
].
q
uadrupole
XX
=
data
.
q
uadrupole
XX
;
localData
[
threadIdx
.
x
].
q
uadrupole
XY
=
data
.
q
uadrupole
XY
;
localData
[
threadIdx
.
x
].
q
uadrupole
XZ
=
data
.
q
uadrupole
XZ
;
localData
[
threadIdx
.
x
].
q
uadrupole
YY
=
data
.
q
uadrupole
YY
;
localData
[
threadIdx
.
x
].
q
uadrupole
YZ
=
data
.
q
uadrupole
YZ
;
localData
[
threadIdx
.
x
].
sphericalQ
uadrupole
[
0
]
=
data
.
sphericalQ
uadrupole
[
0
]
;
localData
[
threadIdx
.
x
].
sphericalQ
uadrupole
[
1
]
=
data
.
sphericalQ
uadrupole
[
1
]
;
localData
[
threadIdx
.
x
].
sphericalQ
uadrupole
[
2
]
=
data
.
sphericalQ
uadrupole
[
2
]
;
localData
[
threadIdx
.
x
].
sphericalQ
uadrupole
[
3
]
=
data
.
sphericalQ
uadrupole
[
3
]
;
localData
[
threadIdx
.
x
].
sphericalQ
uadrupole
[
4
]
=
data
.
sphericalQ
uadrupole
[
4
]
;
#endif
localData
[
threadIdx
.
x
].
inducedDipole
=
data
.
inducedDipole
;
localData
[
threadIdx
.
x
].
inducedDipolePolar
=
data
.
inducedDipolePolar
;
...
...
@@ -115,101 +429,57 @@ extern "C" __global__ void computeElectrostatics(
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
y
*
TILE_SIZE
+
j
;
if
(
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
real3
tempForce
;
real
tempEnergy
;
float
d
=
computeDScaleFactor
(
polarizationGroup
,
j
);
float
p
=
computePScaleFactor
(
covalent
,
polarizationGroup
,
j
);
float
m
=
computeMScaleFactor
(
covalent
,
j
);
computeOneInteractionF1
(
data
,
localData
[
tbx
+
j
],
d
,
p
,
m
,
tempEnergy
,
tempForce
);
data
.
force
+=
tempForce
;
energy
+=
0.5
f
*
tempEnergy
;
computeOneInteraction
(
data
,
localData
[
tbx
+
j
],
true
,
d
,
p
,
m
,
0.5
f
,
energy
);
}
}
data
.
force
*=
ENERGY_SCALE_FACTOR
;
data
.
force
*=
-
ENERGY_SCALE_FACTOR
;
data
.
torque
*=
ENERGY_SCALE_FACTOR
;
atomicAdd
(
&
forceBuffers
[
atom1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
z
*
0x100000000
)));
// Compute torques.
data
.
force
=
make_real3
(
0
);
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
y
*
TILE_SIZE
+
j
;
if
(
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
real3
tempForce
;
float
d
=
computeDScaleFactor
(
polarizationGroup
,
j
);
float
p
=
computePScaleFactor
(
covalent
,
polarizationGroup
,
j
);
float
m
=
computeMScaleFactor
(
covalent
,
j
);
computeOneInteractionT1
(
data
,
localData
[
tbx
+
j
],
d
,
p
,
m
,
tempForce
);
data
.
force
+=
tempForce
;
}
}
data
.
force
*=
ENERGY_SCALE_FACTOR
;
atomicAdd
(
&
torqueBuffers
[
atom1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
x
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
y
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
z
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
atom1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
torque
.
x
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
torque
.
y
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
torque
.
z
*
0x100000000
)));
}
else
{
// This is an off-diagonal tile.
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
loadAtomData
(
localData
[
threadIdx
.
x
],
j
,
posq
,
labFrameDipole
,
labFrame
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
loadAtomData
(
localData
[
threadIdx
.
x
],
j
,
posq
,
sphericalDipole
,
spherical
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
localData
[
threadIdx
.
x
].
force
=
make_real3
(
0
);
localData
[
threadIdx
.
x
].
torque
=
make_real3
(
0
);
unsigned
int
tj
=
tgx
;
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
y
*
TILE_SIZE
+
tj
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
real3
tempForce
;
real
tempEnergy
;
float
d
=
computeDScaleFactor
(
polarizationGroup
,
tj
);
float
p
=
computePScaleFactor
(
covalent
,
polarizationGroup
,
tj
);
float
m
=
computeMScaleFactor
(
covalent
,
tj
);
computeOneInteractionF1
(
data
,
localData
[
tbx
+
tj
],
d
,
p
,
m
,
tempEnergy
,
tempForce
);
data
.
force
+=
tempForce
;
localData
[
tbx
+
tj
].
force
-=
tempForce
;
energy
+=
tempEnergy
;
computeOneInteraction
(
data
,
localData
[
tbx
+
tj
],
true
,
d
,
p
,
m
,
1
,
energy
);
}
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
data
.
force
*=
ENERGY_SCALE_FACTOR
;
localData
[
threadIdx
.
x
].
force
*=
ENERGY_SCALE_FACTOR
;
data
.
force
*=
-
ENERGY_SCALE_FACTOR
;
data
.
torque
*=
ENERGY_SCALE_FACTOR
;
localData
[
threadIdx
.
x
].
force
*=
-
ENERGY_SCALE_FACTOR
;
localData
[
threadIdx
.
x
].
torque
*=
ENERGY_SCALE_FACTOR
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
z
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
torque
.
x
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
torque
.
y
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
torque
.
z
*
0x100000000
)));
offset
=
y
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
z
*
0x100000000
)));
// Compute torques.
data
.
force
=
make_real3
(
0
);
localData
[
threadIdx
.
x
].
force
=
make_real3
(
0
);
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
y
*
TILE_SIZE
+
tj
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
real3
tempForce
;
float
d
=
computeDScaleFactor
(
polarizationGroup
,
tj
);
float
p
=
computePScaleFactor
(
covalent
,
polarizationGroup
,
tj
);
float
m
=
computeMScaleFactor
(
covalent
,
tj
);
computeOneInteractionT1
(
data
,
localData
[
tbx
+
tj
],
d
,
p
,
m
,
tempForce
);
data
.
force
+=
tempForce
;
computeOneInteractionT3
(
data
,
localData
[
tbx
+
tj
],
d
,
p
,
m
,
tempForce
);
localData
[
tbx
+
tj
].
force
+=
tempForce
;
}
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
data
.
force
*=
ENERGY_SCALE_FACTOR
;
localData
[
threadIdx
.
x
].
force
*=
ENERGY_SCALE_FACTOR
;
offset
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
torqueBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
x
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
y
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
z
*
0x100000000
)));
offset
=
y
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
torqueBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
x
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
y
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
z
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
torque
.
x
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
torque
.
y
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
torque
.
z
*
0x100000000
)));
}
}
...
...
@@ -272,16 +542,18 @@ extern "C" __global__ void computeElectrostatics(
// Load atom data for this tile.
AtomData
data
;
loadAtomData
(
data
,
atom1
,
posq
,
labFrameDipole
,
labFrame
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
loadAtomData
(
data
,
atom1
,
posq
,
sphericalDipole
,
spherical
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
data
.
force
=
make_real3
(
0
);
data
.
torque
=
make_real3
(
0
);
#ifdef USE_CUTOFF
unsigned
int
j
=
(
numTiles
<=
maxTiles
?
interactingAtoms
[
pos
*
TILE_SIZE
+
tgx
]
:
y
*
TILE_SIZE
+
tgx
);
#else
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
#endif
atomIndices
[
threadIdx
.
x
]
=
j
;
loadAtomData
(
localData
[
threadIdx
.
x
],
j
,
posq
,
labFrameDipole
,
labFrame
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
loadAtomData
(
localData
[
threadIdx
.
x
],
j
,
posq
,
sphericalDipole
,
spherical
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
localData
[
threadIdx
.
x
].
force
=
make_real3
(
0
);
localData
[
threadIdx
.
x
].
torque
=
make_real3
(
0
);
// Compute forces.
...
...
@@ -289,21 +561,24 @@ extern "C" __global__ void computeElectrostatics(
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
atomIndices
[
tbx
+
tj
];
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
real3
tempForce
;
real
tempEnergy
;
computeOneInteractionF1
(
data
,
localData
[
tbx
+
tj
],
1
,
1
,
1
,
tempEnergy
,
tempForce
);
data
.
force
+=
tempForce
;
localData
[
tbx
+
tj
].
force
-=
tempForce
;
energy
+=
tempEnergy
;
computeOneInteraction
(
data
,
localData
[
tbx
+
tj
],
false
,
1
,
1
,
1
,
1
,
energy
);
}
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
data
.
force
*=
ENERGY_SCALE_FACTOR
;
localData
[
threadIdx
.
x
].
force
*=
ENERGY_SCALE_FACTOR
;
data
.
force
*=
-
ENERGY_SCALE_FACTOR
;
data
.
torque
*=
ENERGY_SCALE_FACTOR
;
localData
[
threadIdx
.
x
].
force
*=
-
ENERGY_SCALE_FACTOR
;
localData
[
threadIdx
.
x
].
torque
*=
ENERGY_SCALE_FACTOR
;
// Write results.
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
z
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
torque
.
x
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
torque
.
y
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
torque
.
z
*
0x100000000
)));
#ifdef USE_CUTOFF
offset
=
atomIndices
[
threadIdx
.
x
];
#else
...
...
@@ -312,36 +587,9 @@ extern "C" __global__ void computeElectrostatics(
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
z
*
0x100000000
)));
// Compute torques.
data
.
force
=
make_real3
(
0
);
localData
[
threadIdx
.
x
].
force
=
make_real3
(
0
);
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
y
*
TILE_SIZE
+
tj
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
real3
tempForce
;
computeOneInteractionT1
(
data
,
localData
[
tbx
+
tj
],
1
,
1
,
1
,
tempForce
);
data
.
force
+=
tempForce
;
computeOneInteractionT3
(
data
,
localData
[
tbx
+
tj
],
1
,
1
,
1
,
tempForce
);
localData
[
tbx
+
tj
].
force
+=
tempForce
;
}
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
data
.
force
*=
ENERGY_SCALE_FACTOR
;
localData
[
threadIdx
.
x
].
force
*=
ENERGY_SCALE_FACTOR
;
offset
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
torqueBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
x
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
y
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
z
*
0x100000000
)));
#ifdef USE_CUTOFF
offset
=
atomIndices
[
threadIdx
.
x
];
#else
offset
=
y
*
TILE_SIZE
+
tgx
;
#endif
atomicAdd
(
&
torqueBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
x
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
y
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
z
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
torque
.
x
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
torque
.
y
*
0x100000000
)));
atomicAdd
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
torque
.
z
*
0x100000000
)));
}
pos
++
;
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipolePme.cu
View file @
b7088b74
...
...
@@ -196,14 +196,14 @@ extern "C" __global__ void transformPotentialToCartesianCoordinates(const real*
// Transform the potential.
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
cphi
[
10
*
i
]
=
fphi
[
20
*
i
];
cphi
[
10
*
i
+
1
]
=
a
[
0
][
0
]
*
fphi
[
20
*
i
+
1
]
+
a
[
0
][
1
]
*
fphi
[
20
*
i
+
2
]
+
a
[
0
][
2
]
*
fphi
[
20
*
i
+
3
];
cphi
[
10
*
i
+
2
]
=
a
[
1
][
0
]
*
fphi
[
20
*
i
+
1
]
+
a
[
1
][
1
]
*
fphi
[
20
*
i
+
2
]
+
a
[
1
][
2
]
*
fphi
[
20
*
i
+
3
];
cphi
[
10
*
i
+
3
]
=
a
[
2
][
0
]
*
fphi
[
20
*
i
+
1
]
+
a
[
2
][
1
]
*
fphi
[
20
*
i
+
2
]
+
a
[
2
][
2
]
*
fphi
[
20
*
i
+
3
];
cphi
[
10
*
i
]
=
fphi
[
i
];
cphi
[
10
*
i
+
1
]
=
a
[
0
][
0
]
*
fphi
[
i
+
NUM_ATOMS
*
1
]
+
a
[
0
][
1
]
*
fphi
[
i
+
NUM_ATOMS
*
2
]
+
a
[
0
][
2
]
*
fphi
[
i
+
NUM_ATOMS
*
3
];
cphi
[
10
*
i
+
2
]
=
a
[
1
][
0
]
*
fphi
[
i
+
NUM_ATOMS
*
1
]
+
a
[
1
][
1
]
*
fphi
[
i
+
NUM_ATOMS
*
2
]
+
a
[
1
][
2
]
*
fphi
[
i
+
NUM_ATOMS
*
3
];
cphi
[
10
*
i
+
3
]
=
a
[
2
][
0
]
*
fphi
[
i
+
NUM_ATOMS
*
1
]
+
a
[
2
][
1
]
*
fphi
[
i
+
NUM_ATOMS
*
2
]
+
a
[
2
][
2
]
*
fphi
[
i
+
NUM_ATOMS
*
3
];
for
(
int
j
=
0
;
j
<
6
;
j
++
)
{
cphi
[
10
*
i
+
4
+
j
]
=
0
;
for
(
int
k
=
0
;
k
<
6
;
k
++
)
cphi
[
10
*
i
+
4
+
j
]
+=
b
[
j
][
k
]
*
fphi
[
20
*
i
+
4
+
k
];
cphi
[
10
*
i
+
4
+
j
]
+=
b
[
j
][
k
]
*
fphi
[
i
+
NUM_ATOMS
*
(
4
+
k
)
];
}
}
}
...
...
@@ -211,20 +211,32 @@ extern "C" __global__ void transformPotentialToCartesianCoordinates(const real*
extern
"C"
__global__
void
gridSpreadFixedMultipoles
(
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
fracDipole
,
const
real
*
__restrict__
fracQuadrupole
,
real2
*
__restrict__
pmeGrid
,
int2
*
__restrict__
pmeAtomGridIndex
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
#if __CUDA_ARCH__ < 500
real
array
[
PME_ORDER
*
PME_ORDER
];
#else
// We have shared memory to spare, and putting the workspace array there reduces the load on L2 cache.
__shared__
real
sharedArray
[
PME_ORDER
*
PME_ORDER
*
64
];
real
*
array
=
&
sharedArray
[
PME_ORDER
*
PME_ORDER
*
threadIdx
.
x
];
#endif
real4
theta1
[
PME_ORDER
];
real4
theta2
[
PME_ORDER
];
real4
theta3
[
PME_ORDER
];
// Process the atoms in spatially sorted order. This improves cache performance when loading
// the grid values.
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
m
=
pmeAtomGridIndex
[
i
].
x
;
for
(
int
m
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
m
<
NUM_ATOMS
;
m
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
pos
=
posq
[
m
];
pos
-=
periodicBoxVecZ
*
floor
(
pos
.
z
*
recipBoxVecZ
.
z
+
0.5
f
);
pos
-=
periodicBoxVecY
*
floor
(
pos
.
y
*
recipBoxVecY
.
z
+
0.5
f
);
pos
-=
periodicBoxVecX
*
floor
(
pos
.
x
*
recipBoxVecX
.
z
+
0.5
f
);
real
atomCharge
=
pos
.
w
;
real
atomDipoleX
=
fracDipole
[
m
*
3
];
real
atomDipoleY
=
fracDipole
[
m
*
3
+
1
];
real
atomDipoleZ
=
fracDipole
[
m
*
3
+
2
];
real
atomQuadrupoleXX
=
fracQuadrupole
[
m
*
6
];
real
atomQuadrupoleXY
=
fracQuadrupole
[
m
*
6
+
1
];
real
atomQuadrupoleXZ
=
fracQuadrupole
[
m
*
6
+
2
];
real
atomQuadrupoleYY
=
fracQuadrupole
[
m
*
6
+
3
];
real
atomQuadrupoleYZ
=
fracQuadrupole
[
m
*
6
+
4
];
real
atomQuadrupoleZZ
=
fracQuadrupole
[
m
*
6
+
5
];
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
...
...
@@ -271,16 +283,6 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p
int
index
=
ybase
+
zindex
;
real4
v
=
theta3
[
iz
];
real
atomCharge
=
pos
.
w
;
real
atomDipoleX
=
fracDipole
[
m
*
3
];
real
atomDipoleY
=
fracDipole
[
m
*
3
+
1
];
real
atomDipoleZ
=
fracDipole
[
m
*
3
+
2
];
real
atomQuadrupoleXX
=
fracQuadrupole
[
m
*
6
];
real
atomQuadrupoleXY
=
fracQuadrupole
[
m
*
6
+
1
];
real
atomQuadrupoleXZ
=
fracQuadrupole
[
m
*
6
+
2
];
real
atomQuadrupoleYY
=
fracQuadrupole
[
m
*
6
+
3
];
real
atomQuadrupoleYZ
=
fracQuadrupole
[
m
*
6
+
4
];
real
atomQuadrupoleZZ
=
fracQuadrupole
[
m
*
6
+
5
];
real
term0
=
atomCharge
*
u
.
x
*
v
.
x
+
atomDipoleY
*
u
.
y
*
v
.
x
+
atomDipoleZ
*
u
.
x
*
v
.
y
+
atomQuadrupoleYY
*
u
.
z
*
v
.
x
+
atomQuadrupoleZZ
*
u
.
x
*
v
.
z
+
atomQuadrupoleYZ
*
u
.
y
*
v
.
y
;
real
term1
=
atomDipoleX
*
u
.
x
*
v
.
x
+
atomQuadrupoleXY
*
u
.
y
*
v
.
x
+
atomQuadrupoleXZ
*
u
.
x
*
v
.
y
;
real
term2
=
atomQuadrupoleXX
*
u
.
x
*
v
.
x
;
...
...
@@ -300,7 +302,13 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p
extern
"C"
__global__
void
gridSpreadInducedDipoles
(
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
real2
*
__restrict__
pmeGrid
,
int2
*
__restrict__
pmeAtomGridIndex
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
#if __CUDA_ARCH__ < 500
real
array
[
PME_ORDER
*
PME_ORDER
];
#else
// We have shared memory to spare, and putting the workspace array there reduces the load on L2 cache.
__shared__
real
sharedArray
[
PME_ORDER
*
PME_ORDER
*
64
];
real
*
array
=
&
sharedArray
[
PME_ORDER
*
PME_ORDER
*
threadIdx
.
x
];
#endif
real4
theta1
[
PME_ORDER
];
real4
theta2
[
PME_ORDER
];
real4
theta3
[
PME_ORDER
];
...
...
@@ -318,15 +326,19 @@ extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ po
}
__syncthreads
();
// Process the atoms in spatially sorted order. This improves cache performance when loading
// the grid values.
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
m
=
pmeAtomGridIndex
[
i
].
x
;
for
(
int
m
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
m
<
NUM_ATOMS
;
m
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
pos
=
posq
[
m
];
pos
-=
periodicBoxVecZ
*
floor
(
pos
.
z
*
recipBoxVecZ
.
z
+
0.5
f
);
pos
-=
periodicBoxVecY
*
floor
(
pos
.
y
*
recipBoxVecY
.
z
+
0.5
f
);
pos
-=
periodicBoxVecX
*
floor
(
pos
.
x
*
recipBoxVecX
.
z
+
0.5
f
);
real3
cinducedDipole
=
((
const
real3
*
)
inducedDipole
)[
m
];
real3
cinducedDipolePolar
=
((
const
real3
*
)
inducedDipolePolar
)[
m
];
real3
finducedDipole
=
make_real3
(
cinducedDipole
.
x
*
cartToFrac
[
0
][
0
]
+
cinducedDipole
.
y
*
cartToFrac
[
0
][
1
]
+
cinducedDipole
.
z
*
cartToFrac
[
0
][
2
],
cinducedDipole
.
x
*
cartToFrac
[
1
][
0
]
+
cinducedDipole
.
y
*
cartToFrac
[
1
][
1
]
+
cinducedDipole
.
z
*
cartToFrac
[
1
][
2
],
cinducedDipole
.
x
*
cartToFrac
[
2
][
0
]
+
cinducedDipole
.
y
*
cartToFrac
[
2
][
1
]
+
cinducedDipole
.
z
*
cartToFrac
[
2
][
2
]);
real3
finducedDipolePolar
=
make_real3
(
cinducedDipolePolar
.
x
*
cartToFrac
[
0
][
0
]
+
cinducedDipolePolar
.
y
*
cartToFrac
[
0
][
1
]
+
cinducedDipolePolar
.
z
*
cartToFrac
[
0
][
2
],
cinducedDipolePolar
.
x
*
cartToFrac
[
1
][
0
]
+
cinducedDipolePolar
.
y
*
cartToFrac
[
1
][
1
]
+
cinducedDipolePolar
.
z
*
cartToFrac
[
1
][
2
],
cinducedDipolePolar
.
x
*
cartToFrac
[
2
][
0
]
+
cinducedDipolePolar
.
y
*
cartToFrac
[
2
][
1
]
+
cinducedDipolePolar
.
z
*
cartToFrac
[
2
][
2
]);
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
...
...
@@ -373,14 +385,6 @@ extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ po
int
index
=
ybase
+
zindex
;
real4
v
=
theta3
[
iz
];
real3
cinducedDipole
=
make_real3
(
inducedDipole
[
m
*
3
],
inducedDipole
[
m
*
3
+
1
],
inducedDipole
[
m
*
3
+
2
]);
real3
cinducedDipolePolar
=
make_real3
(
inducedDipolePolar
[
m
*
3
],
inducedDipolePolar
[
m
*
3
+
1
],
inducedDipolePolar
[
m
*
3
+
2
]);
real3
finducedDipole
=
make_real3
(
cinducedDipole
.
x
*
cartToFrac
[
0
][
0
]
+
cinducedDipole
.
y
*
cartToFrac
[
0
][
1
]
+
cinducedDipole
.
z
*
cartToFrac
[
0
][
2
],
cinducedDipole
.
x
*
cartToFrac
[
1
][
0
]
+
cinducedDipole
.
y
*
cartToFrac
[
1
][
1
]
+
cinducedDipole
.
z
*
cartToFrac
[
1
][
2
],
cinducedDipole
.
x
*
cartToFrac
[
2
][
0
]
+
cinducedDipole
.
y
*
cartToFrac
[
2
][
1
]
+
cinducedDipole
.
z
*
cartToFrac
[
2
][
2
]);
real3
finducedDipolePolar
=
make_real3
(
cinducedDipolePolar
.
x
*
cartToFrac
[
0
][
0
]
+
cinducedDipolePolar
.
y
*
cartToFrac
[
0
][
1
]
+
cinducedDipolePolar
.
z
*
cartToFrac
[
0
][
2
],
cinducedDipolePolar
.
x
*
cartToFrac
[
1
][
0
]
+
cinducedDipolePolar
.
y
*
cartToFrac
[
1
][
1
]
+
cinducedDipolePolar
.
z
*
cartToFrac
[
1
][
2
],
cinducedDipolePolar
.
x
*
cartToFrac
[
2
][
0
]
+
cinducedDipolePolar
.
y
*
cartToFrac
[
2
][
1
]
+
cinducedDipolePolar
.
z
*
cartToFrac
[
2
][
2
]);
real
term01
=
finducedDipole
.
y
*
u
.
y
*
v
.
x
+
finducedDipole
.
z
*
u
.
x
*
v
.
y
;
real
term11
=
finducedDipole
.
x
*
u
.
x
*
v
.
x
;
real
term02
=
finducedDipolePolar
.
y
*
u
.
y
*
v
.
x
+
finducedDipolePolar
.
z
*
u
.
x
*
v
.
y
;
...
...
@@ -448,7 +452,13 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
long
long
*
__restrict__
fieldBuffers
,
long
long
*
__restrict__
fieldPolarBuffers
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
labFrameDipole
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
int2
*
__restrict__
pmeAtomGridIndex
)
{
#if __CUDA_ARCH__ < 500
real
array
[
PME_ORDER
*
PME_ORDER
];
#else
// We have shared memory to spare, and putting the workspace array there reduces the load on L2 cache.
__shared__
real
sharedArray
[
PME_ORDER
*
PME_ORDER
*
64
];
real
*
array
=
&
sharedArray
[
PME_ORDER
*
PME_ORDER
*
threadIdx
.
x
];
#endif
real4
theta1
[
PME_ORDER
];
real4
theta2
[
PME_ORDER
];
real4
theta3
[
PME_ORDER
];
...
...
@@ -582,26 +592,26 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
tuv012
+=
tu01
*
v
.
z
;
tuv111
+=
tu11
*
v
.
y
;
}
phi
[
20
*
m
]
=
tuv000
;
phi
[
20
*
m
+
1
]
=
tuv100
;
phi
[
20
*
m
+
2
]
=
tuv010
;
phi
[
20
*
m
+
3
]
=
tuv001
;
phi
[
20
*
m
+
4
]
=
tuv200
;
phi
[
20
*
m
+
5
]
=
tuv020
;
phi
[
20
*
m
+
6
]
=
tuv002
;
phi
[
20
*
m
+
7
]
=
tuv110
;
phi
[
20
*
m
+
8
]
=
tuv101
;
phi
[
20
*
m
+
9
]
=
tuv011
;
phi
[
20
*
m
+
10
]
=
tuv300
;
phi
[
20
*
m
+
11
]
=
tuv030
;
phi
[
20
*
m
+
12
]
=
tuv003
;
phi
[
20
*
m
+
13
]
=
tuv210
;
phi
[
20
*
m
+
14
]
=
tuv201
;
phi
[
20
*
m
+
15
]
=
tuv120
;
phi
[
20
*
m
+
16
]
=
tuv021
;
phi
[
20
*
m
+
17
]
=
tuv102
;
phi
[
20
*
m
+
18
]
=
tuv012
;
phi
[
20
*
m
+
19
]
=
tuv111
;
phi
[
m
]
=
tuv000
;
phi
[
m
+
NUM_ATOMS
]
=
tuv100
;
phi
[
m
+
NUM_ATOMS
*
2
]
=
tuv010
;
phi
[
m
+
NUM_ATOMS
*
3
]
=
tuv001
;
phi
[
m
+
NUM_ATOMS
*
4
]
=
tuv200
;
phi
[
m
+
NUM_ATOMS
*
5
]
=
tuv020
;
phi
[
m
+
NUM_ATOMS
*
6
]
=
tuv002
;
phi
[
m
+
NUM_ATOMS
*
7
]
=
tuv110
;
phi
[
m
+
NUM_ATOMS
*
8
]
=
tuv101
;
phi
[
m
+
NUM_ATOMS
*
9
]
=
tuv011
;
phi
[
m
+
NUM_ATOMS
*
10
]
=
tuv300
;
phi
[
m
+
NUM_ATOMS
*
11
]
=
tuv030
;
phi
[
m
+
NUM_ATOMS
*
12
]
=
tuv003
;
phi
[
m
+
NUM_ATOMS
*
13
]
=
tuv210
;
phi
[
m
+
NUM_ATOMS
*
14
]
=
tuv201
;
phi
[
m
+
NUM_ATOMS
*
15
]
=
tuv120
;
phi
[
m
+
NUM_ATOMS
*
16
]
=
tuv021
;
phi
[
m
+
NUM_ATOMS
*
17
]
=
tuv102
;
phi
[
m
+
NUM_ATOMS
*
18
]
=
tuv012
;
phi
[
m
+
NUM_ATOMS
*
19
]
=
tuv111
;
real
dipoleScale
=
(
4
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT_PI
;
long
long
fieldx
=
(
long
long
)
((
dipoleScale
*
labFrameDipole
[
m
*
3
]
-
tuv100
*
fracToCart
[
0
][
0
]
-
tuv010
*
fracToCart
[
0
][
1
]
-
tuv001
*
fracToCart
[
0
][
2
])
*
0x100000000
);
fieldBuffers
[
m
]
=
fieldx
;
...
...
@@ -619,7 +629,13 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
real
*
__restrict__
phip
,
real
*
__restrict__
phidp
,
const
real4
*
__restrict__
posq
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
int2
*
__restrict__
pmeAtomGridIndex
)
{
#if __CUDA_ARCH__ < 500
real
array
[
PME_ORDER
*
PME_ORDER
];
#else
// We have shared memory to spare, and putting the workspace array there reduces the load on L2 cache.
__shared__
real
sharedArray
[
PME_ORDER
*
PME_ORDER
*
64
];
real
*
array
=
&
sharedArray
[
PME_ORDER
*
PME_ORDER
*
threadIdx
.
x
];
#endif
real4
theta1
[
PME_ORDER
];
real4
theta2
[
PME_ORDER
];
real4
theta3
[
PME_ORDER
];
...
...
@@ -812,55 +828,55 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
tuv012
+=
tu01
*
v
.
z
;
tuv111
+=
tu11
*
v
.
y
;
}
phid
[
10
*
m
]
=
0
;
phid
[
10
*
m
+
1
]
=
tuv100_1
;
phid
[
10
*
m
+
2
]
=
tuv010_1
;
phid
[
10
*
m
+
3
]
=
tuv001_1
;
phid
[
10
*
m
+
4
]
=
tuv200_1
;
phid
[
10
*
m
+
5
]
=
tuv020_1
;
phid
[
10
*
m
+
6
]
=
tuv002_1
;
phid
[
10
*
m
+
7
]
=
tuv110_1
;
phid
[
10
*
m
+
8
]
=
tuv101_1
;
phid
[
10
*
m
+
9
]
=
tuv011_1
;
phip
[
10
*
m
]
=
0
;
phip
[
10
*
m
+
1
]
=
tuv100_2
;
phip
[
10
*
m
+
2
]
=
tuv010_2
;
phip
[
10
*
m
+
3
]
=
tuv001_2
;
phip
[
10
*
m
+
4
]
=
tuv200_2
;
phip
[
10
*
m
+
5
]
=
tuv020_2
;
phip
[
10
*
m
+
6
]
=
tuv002_2
;
phip
[
10
*
m
+
7
]
=
tuv110_2
;
phip
[
10
*
m
+
8
]
=
tuv101_2
;
phip
[
10
*
m
+
9
]
=
tuv011_2
;
phidp
[
20
*
m
]
=
tuv000
;
phidp
[
20
*
m
+
1
]
=
tuv100
;
phidp
[
20
*
m
+
2
]
=
tuv010
;
phidp
[
20
*
m
+
3
]
=
tuv001
;
phidp
[
20
*
m
+
4
]
=
tuv200
;
phidp
[
20
*
m
+
5
]
=
tuv020
;
phidp
[
20
*
m
+
6
]
=
tuv002
;
phidp
[
20
*
m
+
7
]
=
tuv110
;
phidp
[
20
*
m
+
8
]
=
tuv101
;
phidp
[
20
*
m
+
9
]
=
tuv011
;
phidp
[
20
*
m
+
10
]
=
tuv300
;
phidp
[
20
*
m
+
11
]
=
tuv030
;
phidp
[
20
*
m
+
12
]
=
tuv003
;
phidp
[
20
*
m
+
13
]
=
tuv210
;
phidp
[
20
*
m
+
14
]
=
tuv201
;
phidp
[
20
*
m
+
15
]
=
tuv120
;
phidp
[
20
*
m
+
16
]
=
tuv021
;
phidp
[
20
*
m
+
17
]
=
tuv102
;
phidp
[
20
*
m
+
18
]
=
tuv012
;
phidp
[
20
*
m
+
19
]
=
tuv111
;
phid
[
m
]
=
0
;
phid
[
m
+
NUM_ATOMS
]
=
tuv100_1
;
phid
[
m
+
NUM_ATOMS
*
2
]
=
tuv010_1
;
phid
[
m
+
NUM_ATOMS
*
3
]
=
tuv001_1
;
phid
[
m
+
NUM_ATOMS
*
4
]
=
tuv200_1
;
phid
[
m
+
NUM_ATOMS
*
5
]
=
tuv020_1
;
phid
[
m
+
NUM_ATOMS
*
6
]
=
tuv002_1
;
phid
[
m
+
NUM_ATOMS
*
7
]
=
tuv110_1
;
phid
[
m
+
NUM_ATOMS
*
8
]
=
tuv101_1
;
phid
[
m
+
NUM_ATOMS
*
9
]
=
tuv011_1
;
phip
[
m
]
=
0
;
phip
[
m
+
NUM_ATOMS
]
=
tuv100_2
;
phip
[
m
+
NUM_ATOMS
*
2
]
=
tuv010_2
;
phip
[
m
+
NUM_ATOMS
*
3
]
=
tuv001_2
;
phip
[
m
+
NUM_ATOMS
*
4
]
=
tuv200_2
;
phip
[
m
+
NUM_ATOMS
*
5
]
=
tuv020_2
;
phip
[
m
+
NUM_ATOMS
*
6
]
=
tuv002_2
;
phip
[
m
+
NUM_ATOMS
*
7
]
=
tuv110_2
;
phip
[
m
+
NUM_ATOMS
*
8
]
=
tuv101_2
;
phip
[
m
+
NUM_ATOMS
*
9
]
=
tuv011_2
;
phidp
[
m
]
=
tuv000
;
phidp
[
m
+
NUM_ATOMS
*
1
]
=
tuv100
;
phidp
[
m
+
NUM_ATOMS
*
2
]
=
tuv010
;
phidp
[
m
+
NUM_ATOMS
*
3
]
=
tuv001
;
phidp
[
m
+
NUM_ATOMS
*
4
]
=
tuv200
;
phidp
[
m
+
NUM_ATOMS
*
5
]
=
tuv020
;
phidp
[
m
+
NUM_ATOMS
*
6
]
=
tuv002
;
phidp
[
m
+
NUM_ATOMS
*
7
]
=
tuv110
;
phidp
[
m
+
NUM_ATOMS
*
8
]
=
tuv101
;
phidp
[
m
+
NUM_ATOMS
*
9
]
=
tuv011
;
phidp
[
m
+
NUM_ATOMS
*
10
]
=
tuv300
;
phidp
[
m
+
NUM_ATOMS
*
11
]
=
tuv030
;
phidp
[
m
+
NUM_ATOMS
*
12
]
=
tuv003
;
phidp
[
m
+
NUM_ATOMS
*
13
]
=
tuv210
;
phidp
[
m
+
NUM_ATOMS
*
14
]
=
tuv201
;
phidp
[
m
+
NUM_ATOMS
*
15
]
=
tuv120
;
phidp
[
m
+
NUM_ATOMS
*
16
]
=
tuv021
;
phidp
[
m
+
NUM_ATOMS
*
17
]
=
tuv102
;
phidp
[
m
+
NUM_ATOMS
*
18
]
=
tuv012
;
phidp
[
m
+
NUM_ATOMS
*
19
]
=
tuv111
;
}
}
extern
"C"
__global__
void
computeFixedMultipoleForceAndEnergy
(
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
long
long
*
__restrict__
torqueBuffers
,
real
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
fracDipole
,
const
real
*
__restrict__
fracQuadrupole
,
const
real
*
__restrict__
phi
_global
,
const
real
*
__restrict__
cphi_global
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
const
real
*
__restrict__
phi
,
const
real
*
__restrict__
cphi_global
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real
multipole
[
10
];
const
int
deriv1
[]
=
{
1
,
4
,
7
,
8
,
10
,
15
,
17
,
13
,
14
,
19
};
const
int
deriv2
[]
=
{
2
,
7
,
5
,
9
,
13
,
11
,
18
,
15
,
19
,
16
};
...
...
@@ -922,13 +938,12 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
multipole
[
8
]
=
fracQuadrupole
[
i
*
6
+
2
];
multipole
[
9
]
=
fracQuadrupole
[
i
*
6
+
4
];
const
real
*
phi
=
&
phi_global
[
20
*
i
];
real4
f
=
make_real4
(
0
,
0
,
0
,
0
);
for
(
int
k
=
0
;
k
<
10
;
k
++
)
{
energy
+=
multipole
[
k
]
*
phi
[
k
];
f
.
x
+=
multipole
[
k
]
*
phi
[
deriv1
[
k
]];
f
.
y
+=
multipole
[
k
]
*
phi
[
deriv2
[
k
]];
f
.
z
+=
multipole
[
k
]
*
phi
[
deriv3
[
k
]];
energy
+=
multipole
[
k
]
*
phi
[
i
+
NUM_ATOMS
*
k
];
f
.
x
+=
multipole
[
k
]
*
phi
[
i
+
NUM_ATOMS
*
deriv1
[
k
]];
f
.
y
+=
multipole
[
k
]
*
phi
[
i
+
NUM_ATOMS
*
deriv2
[
k
]];
f
.
z
+=
multipole
[
k
]
*
phi
[
i
+
NUM_ATOMS
*
deriv3
[
k
]];
}
f
=
make_real4
(
EPSILON_FACTOR
*
(
f
.
x
*
fracToCart
[
0
][
0
]
+
f
.
y
*
fracToCart
[
0
][
1
]
+
f
.
z
*
fracToCart
[
0
][
2
]),
EPSILON_FACTOR
*
(
f
.
x
*
fracToCart
[
1
][
0
]
+
f
.
y
*
fracToCart
[
1
][
1
]
+
f
.
z
*
fracToCart
[
1
][
2
]),
...
...
@@ -944,8 +959,8 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
long
long
*
__restrict__
torqueBuffers
,
real
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
fracDipole
,
const
real
*
__restrict__
fracQuadrupole
,
const
real
*
__restrict__
inducedDipole_global
,
const
real
*
__restrict__
inducedDipolePolar_global
,
const
real
*
__restrict__
phi
_global
,
const
real
*
__restrict__
phid
_global
,
const
real
*
__restrict__
phip
_global
,
const
real
*
__restrict__
phidp
_global
,
const
real
*
__restrict__
cphi_global
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
const
real
*
__restrict__
phi
,
const
real
*
__restrict__
phid
,
const
real
*
__restrict__
phip
,
const
real
*
__restrict__
phidp
,
const
real
*
__restrict__
cphi_global
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real
multipole
[
10
];
real
cinducedDipole
[
3
],
inducedDipole
[
3
];
real
cinducedDipolePolar
[
3
],
inducedDipolePolar
[
3
];
...
...
@@ -1023,34 +1038,30 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
inducedDipolePolar
[
0
]
=
cinducedDipolePolar
[
0
]
*
fracToCart
[
0
][
0
]
+
cinducedDipolePolar
[
1
]
*
fracToCart
[
1
][
0
]
+
cinducedDipolePolar
[
2
]
*
fracToCart
[
2
][
0
];
inducedDipolePolar
[
1
]
=
cinducedDipolePolar
[
0
]
*
fracToCart
[
0
][
1
]
+
cinducedDipolePolar
[
1
]
*
fracToCart
[
1
][
1
]
+
cinducedDipolePolar
[
2
]
*
fracToCart
[
2
][
1
];
inducedDipolePolar
[
2
]
=
cinducedDipolePolar
[
0
]
*
fracToCart
[
0
][
2
]
+
cinducedDipolePolar
[
1
]
*
fracToCart
[
1
][
2
]
+
cinducedDipolePolar
[
2
]
*
fracToCart
[
2
][
2
];
const
real
*
phi
=
&
phi_global
[
20
*
i
];
const
real
*
phip
=
&
phip_global
[
10
*
i
];
const
real
*
phid
=
&
phid_global
[
10
*
i
];
real4
f
=
make_real4
(
0
,
0
,
0
,
0
);
energy
+=
inducedDipole
[
0
]
*
phi
[
1
];
energy
+=
inducedDipole
[
1
]
*
phi
[
2
];
energy
+=
inducedDipole
[
2
]
*
phi
[
3
];
energy
+=
inducedDipole
[
0
]
*
phi
[
i
+
NUM_ATOMS
];
energy
+=
inducedDipole
[
1
]
*
phi
[
i
+
NUM_ATOMS
*
2
];
energy
+=
inducedDipole
[
2
]
*
phi
[
i
+
NUM_ATOMS
*
3
];
for
(
int
k
=
0
;
k
<
3
;
k
++
)
{
int
j1
=
deriv1
[
k
+
1
];
int
j2
=
deriv2
[
k
+
1
];
int
j3
=
deriv3
[
k
+
1
];
f
.
x
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
j1
];
f
.
y
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
j2
];
f
.
z
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
j3
];
f
.
x
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
i
+
NUM_ATOMS
*
j1
];
f
.
y
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
i
+
NUM_ATOMS
*
j2
];
f
.
z
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
i
+
NUM_ATOMS
*
j3
];
#ifndef DIRECT_POLARIZATION
f
.
x
+=
(
inducedDipole
[
k
]
*
phip
[
j1
]
+
inducedDipolePolar
[
k
]
*
phid
[
j1
]);
f
.
y
+=
(
inducedDipole
[
k
]
*
phip
[
j2
]
+
inducedDipolePolar
[
k
]
*
phid
[
j2
]);
f
.
z
+=
(
inducedDipole
[
k
]
*
phip
[
j3
]
+
inducedDipolePolar
[
k
]
*
phid
[
j3
]);
f
.
x
+=
(
inducedDipole
[
k
]
*
phip
[
i
+
NUM_ATOMS
*
j1
]
+
inducedDipolePolar
[
k
]
*
phid
[
i
+
NUM_ATOMS
*
j1
]);
f
.
y
+=
(
inducedDipole
[
k
]
*
phip
[
i
+
NUM_ATOMS
*
j2
]
+
inducedDipolePolar
[
k
]
*
phid
[
i
+
NUM_ATOMS
*
j2
]);
f
.
z
+=
(
inducedDipole
[
k
]
*
phip
[
i
+
NUM_ATOMS
*
j3
]
+
inducedDipolePolar
[
k
]
*
phid
[
i
+
NUM_ATOMS
*
j3
]);
#endif
}
const
real
*
phidp
=
&
phidp_global
[
20
*
i
];
for
(
int
k
=
0
;
k
<
10
;
k
++
)
{
f
.
x
+=
multipole
[
k
]
*
phidp
[
deriv1
[
k
]];
f
.
y
+=
multipole
[
k
]
*
phidp
[
deriv2
[
k
]];
f
.
z
+=
multipole
[
k
]
*
phidp
[
deriv3
[
k
]];
f
.
x
+=
multipole
[
k
]
*
phidp
[
i
+
NUM_ATOMS
*
deriv1
[
k
]];
f
.
y
+=
multipole
[
k
]
*
phidp
[
i
+
NUM_ATOMS
*
deriv2
[
k
]];
f
.
z
+=
multipole
[
k
]
*
phidp
[
i
+
NUM_ATOMS
*
deriv3
[
k
]];
}
f
=
make_real4
(
0.5
f
*
EPSILON_FACTOR
*
(
f
.
x
*
fracToCart
[
0
][
0
]
+
f
.
y
*
fracToCart
[
0
][
1
]
+
f
.
z
*
fracToCart
[
0
][
2
]),
0.5
f
*
EPSILON_FACTOR
*
(
f
.
x
*
fracToCart
[
1
][
0
]
+
f
.
y
*
fracToCart
[
1
][
1
]
+
f
.
z
*
fracToCart
[
1
][
2
]),
...
...
@@ -1078,11 +1089,11 @@ extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ ph
}
__syncthreads
();
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
inducedField
[
i
]
-=
(
long
long
)
(
0x100000000
*
(
phid
[
10
*
i
+
1
]
*
fracToCart
[
0
][
0
]
+
phid
[
10
*
i
+
2
]
*
fracToCart
[
0
][
1
]
+
phid
[
10
*
i
+
3
]
*
fracToCart
[
0
][
2
]));
inducedField
[
i
+
PADDED_NUM_ATOMS
]
-=
(
long
long
)
(
0x100000000
*
(
phid
[
10
*
i
+
1
]
*
fracToCart
[
1
][
0
]
+
phid
[
10
*
i
+
2
]
*
fracToCart
[
1
][
1
]
+
phid
[
10
*
i
+
3
]
*
fracToCart
[
1
][
2
]));
inducedField
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
(
long
long
)
(
0x100000000
*
(
phid
[
10
*
i
+
1
]
*
fracToCart
[
2
][
0
]
+
phid
[
10
*
i
+
2
]
*
fracToCart
[
2
][
1
]
+
phid
[
10
*
i
+
3
]
*
fracToCart
[
2
][
2
]));
inducedFieldPolar
[
i
]
-=
(
long
long
)
(
0x100000000
*
(
phip
[
10
*
i
+
1
]
*
fracToCart
[
0
][
0
]
+
phip
[
10
*
i
+
2
]
*
fracToCart
[
0
][
1
]
+
phip
[
10
*
i
+
3
]
*
fracToCart
[
0
][
2
]));
inducedFieldPolar
[
i
+
PADDED_NUM_ATOMS
]
-=
(
long
long
)
(
0x100000000
*
(
phip
[
10
*
i
+
1
]
*
fracToCart
[
1
][
0
]
+
phip
[
10
*
i
+
2
]
*
fracToCart
[
1
][
1
]
+
phip
[
10
*
i
+
3
]
*
fracToCart
[
1
][
2
]));
inducedFieldPolar
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
(
long
long
)
(
0x100000000
*
(
phip
[
10
*
i
+
1
]
*
fracToCart
[
2
][
0
]
+
phip
[
10
*
i
+
2
]
*
fracToCart
[
2
][
1
]
+
phip
[
10
*
i
+
3
]
*
fracToCart
[
2
][
2
]));
inducedField
[
i
]
-=
(
long
long
)
(
0x100000000
*
(
phid
[
i
+
NUM_ATOMS
]
*
fracToCart
[
0
][
0
]
+
phid
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
0
][
1
]
+
phid
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
0
][
2
]));
inducedField
[
i
+
PADDED_NUM_ATOMS
]
-=
(
long
long
)
(
0x100000000
*
(
phid
[
i
+
NUM_ATOMS
]
*
fracToCart
[
1
][
0
]
+
phid
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
1
][
1
]
+
phid
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
1
][
2
]));
inducedField
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
(
long
long
)
(
0x100000000
*
(
phid
[
i
+
NUM_ATOMS
]
*
fracToCart
[
2
][
0
]
+
phid
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
2
][
1
]
+
phid
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
2
][
2
]));
inducedFieldPolar
[
i
]
-=
(
long
long
)
(
0x100000000
*
(
phip
[
i
+
NUM_ATOMS
]
*
fracToCart
[
0
][
0
]
+
phip
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
0
][
1
]
+
phip
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
0
][
2
]));
inducedFieldPolar
[
i
+
PADDED_NUM_ATOMS
]
-=
(
long
long
)
(
0x100000000
*
(
phip
[
i
+
NUM_ATOMS
]
*
fracToCart
[
1
][
0
]
+
phip
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
1
][
1
]
+
phip
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
1
][
2
]));
inducedFieldPolar
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
(
long
long
)
(
0x100000000
*
(
phip
[
i
+
NUM_ATOMS
]
*
fracToCart
[
2
][
0
]
+
phip
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
2
][
1
]
+
phip
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
2
][
2
]));
}
}
plugins/amoeba/platforms/cuda/src/kernels/multipoles.cu
View file @
b7088b74
extern
"C"
__global__
void
computeLabFrameMoments
(
real4
*
__restrict__
posq
,
int4
*
__restrict__
multipoleParticles
,
float
*
__restrict__
molecularDipoles
,
float
*
__restrict__
molecularQuadrupoles
,
real
*
__restrict__
labFrameDipoles
,
real
*
__restrict__
labFrameQuadrupoles
)
{
// get coordinates of this atom and the z & x axis atoms
// compute the vector between the atoms and 1/sqrt(d2), d2 is distance between
// this atom and the axis atom
// this atom is referred to as the k-atom in notes below
// code common to ZThenX and Bisector
float
*
__restrict__
molecularQuadrupoles
,
real
*
__restrict__
labFrameDipoles
,
real
*
__restrict__
labFrameQuadrupoles
,
real
*
__restrict__
sphericalDipoles
,
real
*
__restrict__
sphericalQuadrupoles
)
{
for
(
int
atom
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
atom
<
NUM_ATOMS
;
atom
+=
gridDim
.
x
*
blockDim
.
x
)
{
// Load the spherical multipoles.
int
offset
=
3
*
atom
;
sphericalDipoles
[
offset
+
0
]
=
molecularDipoles
[
offset
+
2
];
// z -> Q_10
sphericalDipoles
[
offset
+
1
]
=
molecularDipoles
[
offset
+
0
];
// x -> Q_11c
sphericalDipoles
[
offset
+
2
]
=
molecularDipoles
[
offset
+
1
];
// y -> Q_11s
offset
=
5
*
atom
;
sphericalQuadrupoles
[
offset
+
0
]
=
-
3.0
f
*
(
molecularQuadrupoles
[
offset
+
0
]
+
molecularQuadrupoles
[
offset
+
3
]);
// zz -> Q_20
sphericalQuadrupoles
[
offset
+
1
]
=
(
2
*
SQRT
((
real
)
3
))
*
molecularQuadrupoles
[
offset
+
2
];
// xz -> Q_21c
sphericalQuadrupoles
[
offset
+
2
]
=
(
2
*
SQRT
((
real
)
3
))
*
molecularQuadrupoles
[
offset
+
4
];
// yz -> Q_21s
sphericalQuadrupoles
[
offset
+
3
]
=
SQRT
((
real
)
3
)
*
(
molecularQuadrupoles
[
offset
+
0
]
-
molecularQuadrupoles
[
offset
+
3
]);
// xx-yy -> Q_22c
sphericalQuadrupoles
[
offset
+
4
]
=
(
2
*
SQRT
((
real
)
3
))
*
molecularQuadrupoles
[
offset
+
1
];
// xy -> Q_22s
// get coordinates of this atom and the z & x axis atoms
// compute the vector between the atoms and 1/sqrt(d2), d2 is distance between
// this atom and the axis atom
// this atom is referred to as the k-atom in notes below
// code common to ZThenX and Bisector
int4
particles
=
multipoleParticles
[
atom
];
if
(
particles
.
x
>=
0
&&
particles
.
z
>=
0
)
{
real4
thisParticlePos
=
posq
[
atom
];
...
...
@@ -149,7 +163,7 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
// Transform the dipole
unsigned
int
offset
=
3
*
atom
;
offset
=
3
*
atom
;
real
molDipole
[
3
];
molDipole
[
0
]
=
molecularDipoles
[
offset
];
molDipole
[
1
]
=
molecularDipoles
[
offset
+
1
];
...
...
@@ -192,6 +206,67 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
labFrameQuadrupoles
[
offset
+
4
]
=
vectorX
.
y
*
(
vectorX
.
z
*
mPoleXX
+
vectorY
.
z
*
mPoleXY
+
vectorZ
.
z
*
mPoleXZ
)
+
vectorY
.
y
*
(
vectorX
.
z
*
mPoleXY
+
vectorY
.
z
*
mPoleYY
+
vectorZ
.
z
*
mPoleYZ
)
+
vectorZ
.
y
*
(
vectorX
.
z
*
mPoleXZ
+
vectorY
.
z
*
mPoleYZ
+
vectorZ
.
z
*
mPoleZZ
);
// ---------------------------------------------------------------------------------------
// Now transform the spherical multipoles. First do the dipoles.
offset
=
3
*
atom
;
real
sphericalDipole
[
3
];
sphericalDipole
[
0
]
=
sphericalDipoles
[
offset
];
sphericalDipole
[
1
]
=
sphericalDipoles
[
offset
+
1
];
sphericalDipole
[
2
]
=
sphericalDipoles
[
offset
+
2
];
if
(
reverse
)
sphericalDipole
[
2
]
*=
-
1
;
sphericalDipoles
[
offset
]
=
sphericalDipole
[
0
]
*
vectorZ
.
z
+
sphericalDipole
[
1
]
*
vectorX
.
z
+
sphericalDipole
[
2
]
*
vectorY
.
z
;
sphericalDipoles
[
offset
+
1
]
=
sphericalDipole
[
0
]
*
vectorZ
.
x
+
sphericalDipole
[
1
]
*
vectorX
.
x
+
sphericalDipole
[
2
]
*
vectorY
.
x
;
sphericalDipoles
[
offset
+
2
]
=
sphericalDipole
[
0
]
*
vectorZ
.
y
+
sphericalDipole
[
1
]
*
vectorX
.
y
+
sphericalDipole
[
2
]
*
vectorY
.
y
;
// Now the quadrupoles.
offset
=
5
*
atom
;
real
sphericalQuadrupole
[
5
];
sphericalQuadrupole
[
0
]
=
sphericalQuadrupoles
[
offset
];
sphericalQuadrupole
[
1
]
=
sphericalQuadrupoles
[
offset
+
1
];
sphericalQuadrupole
[
2
]
=
sphericalQuadrupoles
[
offset
+
2
];
sphericalQuadrupole
[
3
]
=
sphericalQuadrupoles
[
offset
+
3
];
sphericalQuadrupole
[
4
]
=
sphericalQuadrupoles
[
offset
+
4
];
if
(
reverse
)
{
sphericalQuadrupole
[
2
]
*=
-
1
;
sphericalQuadrupole
[
4
]
*=
-
1
;
}
real
rotatedQuadrupole
[
5
]
=
{
0
,
0
,
0
,
0
,
0
};
real
sqrtThree
=
SQRT
((
real
)
3
);
rotatedQuadrupole
[
0
]
+=
sphericalQuadrupole
[
0
]
*
0.5
f
*
(
3.0
f
*
vectorZ
.
z
*
vectorZ
.
z
-
1.0
f
)
+
sphericalQuadrupole
[
1
]
*
sqrtThree
*
vectorZ
.
z
*
vectorX
.
z
+
sphericalQuadrupole
[
2
]
*
sqrtThree
*
vectorZ
.
z
*
vectorY
.
z
+
sphericalQuadrupole
[
3
]
*
0.5
f
*
sqrtThree
*
(
vectorX
.
z
*
vectorX
.
z
-
vectorY
.
z
*
vectorY
.
z
)
+
sphericalQuadrupole
[
4
]
*
sqrtThree
*
vectorX
.
z
*
vectorY
.
z
;
rotatedQuadrupole
[
1
]
+=
sphericalQuadrupole
[
0
]
*
sqrtThree
*
vectorZ
.
z
*
vectorZ
.
x
+
sphericalQuadrupole
[
1
]
*
(
vectorZ
.
x
*
vectorX
.
z
+
vectorZ
.
z
*
vectorX
.
x
)
+
sphericalQuadrupole
[
2
]
*
(
vectorZ
.
x
*
vectorY
.
z
+
vectorZ
.
z
*
vectorY
.
x
)
+
sphericalQuadrupole
[
3
]
*
(
vectorX
.
z
*
vectorX
.
x
-
vectorY
.
z
*
vectorY
.
x
)
+
sphericalQuadrupole
[
4
]
*
(
vectorX
.
x
*
vectorY
.
z
+
vectorX
.
z
*
vectorY
.
x
);
rotatedQuadrupole
[
2
]
+=
sphericalQuadrupole
[
0
]
*
sqrtThree
*
vectorZ
.
z
*
vectorZ
.
y
+
sphericalQuadrupole
[
1
]
*
(
vectorZ
.
y
*
vectorX
.
z
+
vectorZ
.
z
*
vectorX
.
y
)
+
sphericalQuadrupole
[
2
]
*
(
vectorZ
.
y
*
vectorY
.
z
+
vectorZ
.
z
*
vectorY
.
y
)
+
sphericalQuadrupole
[
3
]
*
(
vectorX
.
z
*
vectorX
.
y
-
vectorY
.
z
*
vectorY
.
y
)
+
sphericalQuadrupole
[
4
]
*
(
vectorX
.
y
*
vectorY
.
z
+
vectorX
.
z
*
vectorY
.
y
);
rotatedQuadrupole
[
3
]
+=
sphericalQuadrupole
[
0
]
*
0.5
f
*
sqrtThree
*
(
vectorZ
.
x
*
vectorZ
.
x
-
vectorZ
.
y
*
vectorZ
.
y
)
+
sphericalQuadrupole
[
1
]
*
(
vectorZ
.
x
*
vectorX
.
x
-
vectorZ
.
y
*
vectorX
.
y
)
+
sphericalQuadrupole
[
2
]
*
(
vectorZ
.
x
*
vectorY
.
x
-
vectorZ
.
y
*
vectorY
.
y
)
+
sphericalQuadrupole
[
3
]
*
0.5
f
*
(
vectorX
.
x
*
vectorX
.
x
-
vectorX
.
y
*
vectorX
.
y
-
vectorY
.
x
*
vectorY
.
x
+
vectorY
.
y
*
vectorY
.
y
)
+
sphericalQuadrupole
[
4
]
*
(
vectorX
.
x
*
vectorY
.
x
-
vectorX
.
y
*
vectorY
.
y
);
rotatedQuadrupole
[
4
]
+=
sphericalQuadrupole
[
0
]
*
sqrtThree
*
vectorZ
.
x
*
vectorZ
.
y
+
sphericalQuadrupole
[
1
]
*
(
vectorZ
.
y
*
vectorX
.
x
+
vectorZ
.
x
*
vectorX
.
y
)
+
sphericalQuadrupole
[
2
]
*
(
vectorZ
.
y
*
vectorY
.
x
+
vectorZ
.
x
*
vectorY
.
y
)
+
sphericalQuadrupole
[
3
]
*
(
vectorX
.
x
*
vectorX
.
y
-
vectorY
.
x
*
vectorY
.
y
)
+
sphericalQuadrupole
[
4
]
*
(
vectorX
.
y
*
vectorY
.
x
+
vectorX
.
x
*
vectorY
.
y
);
sphericalQuadrupoles
[
offset
]
=
rotatedQuadrupole
[
0
];
sphericalQuadrupoles
[
offset
+
1
]
=
rotatedQuadrupole
[
1
];
sphericalQuadrupoles
[
offset
+
2
]
=
rotatedQuadrupole
[
2
];
sphericalQuadrupoles
[
offset
+
3
]
=
rotatedQuadrupole
[
3
];
sphericalQuadrupoles
[
offset
+
4
]
=
rotatedQuadrupole
[
4
];
}
else
{
labFrameDipoles
[
3
*
atom
]
=
molecularDipoles
[
3
*
atom
];
...
...
plugins/amoeba/platforms/cuda/src/kernels/pmeElectrostaticPairForce.cu
deleted
100644 → 0
View file @
4c00b312
__device__
void
#ifdef APPLY_SCALE
computeOneInteractionF1
(
#else
computeOneInteractionF1NoScale
(
#endif
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
real4
delta
,
real4
bn
,
real
bn5
,
float
forceFactor
,
#ifdef APPLY_SCALE
float
dScale
,
float
pScale
,
float
mScale
,
#endif
real3
&
force
,
real
&
energy
)
{
real
xr
=
delta
.
x
;
real
yr
=
delta
.
y
;
real
zr
=
delta
.
z
;
#ifdef APPLY_SCALE
real
rr1
=
delta
.
w
;
#endif
// set the permanent multipole and induced dipole values;
real
ci
=
atom1
.
q
;
real
di1
=
atom1
.
dipole
.
x
;
real
di2
=
atom1
.
dipole
.
y
;
real
di3
=
atom1
.
dipole
.
z
;
real
qi1
=
atom1
.
quadrupoleXX
;
real
qi2
=
atom1
.
quadrupoleXY
;
real
qi3
=
atom1
.
quadrupoleXZ
;
real
qi5
=
atom1
.
quadrupoleYY
;
real
qi6
=
atom1
.
quadrupoleYZ
;
real
qi9
=
-
(
atom1
.
quadrupoleXX
+
atom1
.
quadrupoleYY
);
real
ck
=
atom2
.
q
;
real
dk1
=
atom2
.
dipole
.
x
;
real
dk2
=
atom2
.
dipole
.
y
;
real
dk3
=
atom2
.
dipole
.
z
;
real
qk1
=
atom2
.
quadrupoleXX
;
real
qk2
=
atom2
.
quadrupoleXY
;
real
qk3
=
atom2
.
quadrupoleXZ
;
real
qk5
=
atom2
.
quadrupoleYY
;
real
qk6
=
atom2
.
quadrupoleYZ
;
real
qk9
=
-
(
atom2
.
quadrupoleXX
+
atom2
.
quadrupoleYY
);
real
bn1
=
bn
.
x
;
real
bn2
=
bn
.
y
;
real
bn3
=
bn
.
z
;
real
bn4
=
bn
.
w
;
#ifdef APPLY_SCALE
real
offset
=
1
-
mScale
;
real
rr3
=
rr1
*
rr1
*
rr1
;
real
gf4
=
2
*
(
bn2
-
3
*
offset
*
rr3
*
rr1
*
rr1
);
#else
real
gf4
=
2
*
bn2
;
#endif
real
qidk1
=
qi1
*
dk1
+
qi2
*
dk2
+
qi3
*
dk3
;
real
qkdi1
=
qk1
*
di1
+
qk2
*
di2
+
qk3
*
di3
;
real
ftm21
=
gf4
*
(
qkdi1
-
qidk1
);
real
qidk2
=
qi2
*
dk1
+
qi5
*
dk2
+
qi6
*
dk3
;
real
qkdi2
=
qk2
*
di1
+
qk5
*
di2
+
qk6
*
di3
;
real
ftm22
=
gf4
*
(
qkdi2
-
qidk2
);
real
qidk3
=
qi3
*
dk1
+
qi6
*
dk2
+
qi9
*
dk3
;
real
qkdi3
=
qk3
*
di1
+
qk6
*
di2
+
qk9
*
di3
;
real
ftm23
=
gf4
*
(
qkdi3
-
qidk3
);
real
qir1
=
qi1
*
xr
+
qi2
*
yr
+
qi3
*
zr
;
real
qir2
=
qi2
*
xr
+
qi5
*
yr
+
qi6
*
zr
;
real
qir3
=
qi3
*
xr
+
qi6
*
yr
+
qi9
*
zr
;
real
qkr1
=
qk1
*
xr
+
qk2
*
yr
+
qk3
*
zr
;
real
qkr2
=
qk2
*
xr
+
qk5
*
yr
+
qk6
*
zr
;
real
qkr3
=
qk3
*
xr
+
qk6
*
yr
+
qk9
*
zr
;
#ifdef APPLY_SCALE
real
gf7
=
4
*
(
bn3
-
15
*
offset
*
rr3
*
rr3
*
rr1
);
#else
real
gf7
=
4
*
bn3
;
#endif
real
qiqkr1
=
qi1
*
qkr1
+
qi2
*
qkr2
+
qi3
*
qkr3
;
real
qkqir1
=
qk1
*
qir1
+
qk2
*
qir2
+
qk3
*
qir3
;
ftm21
+=
gf7
*
(
qiqkr1
+
qkqir1
);
real
qiqkr2
=
qi2
*
qkr1
+
qi5
*
qkr2
+
qi6
*
qkr3
;
real
qkqir2
=
qk2
*
qir1
+
qk5
*
qir2
+
qk6
*
qir3
;
ftm22
+=
gf7
*
(
qiqkr2
+
qkqir2
);
real
qiqkr3
=
qi3
*
qkr1
+
qi6
*
qkr2
+
qi9
*
qkr3
;
real
qkqir3
=
qk3
*
qir1
+
qk6
*
qir2
+
qk9
*
qir3
;
ftm23
+=
gf7
*
(
qiqkr3
+
qkqir3
);
// calculate the scalar products for permanent components
real
gl6
=
di1
*
dk1
+
di2
*
dk2
+
di3
*
dk3
;
real
gl7
=
2
*
(
qir1
*
dk1
+
qir2
*
dk2
+
qir3
*
dk3
-
(
qkr1
*
di1
+
qkr2
*
di2
+
qkr3
*
di3
));
real
gl5
=
-
4
*
(
qir1
*
qkr1
+
qir2
*
qkr2
+
qir3
*
qkr3
);
real
gl8
=
2
*
(
qi1
*
qk1
+
qi2
*
qk2
+
qi3
*
qk3
+
qi2
*
qk2
+
qi5
*
qk5
+
qi6
*
qk6
+
qi3
*
qk3
+
qi6
*
qk6
+
qi9
*
qk9
);
real
sc3
=
di1
*
xr
+
di2
*
yr
+
di3
*
zr
;
real
sc5
=
qir1
*
xr
+
qir2
*
yr
+
qir3
*
zr
;
real
sc4
=
dk1
*
xr
+
dk2
*
yr
+
dk3
*
zr
;
real
sc6
=
qkr1
*
xr
+
qkr2
*
yr
+
qkr3
*
zr
;
real
gl0
=
ci
*
ck
;
real
gl1
=
ck
*
sc3
-
ci
*
sc4
;
real
gl2
=
ci
*
sc6
+
ck
*
sc5
-
sc3
*
sc4
;
real
gl3
=
sc3
*
sc6
-
sc4
*
sc5
;
real
gl4
=
sc5
*
sc6
;
#ifdef APPLY_SCALE
energy
+=
forceFactor
*
(
-
offset
*
rr1
*
gl0
+
(
bn1
-
offset
*
rr3
)
*
(
gl1
+
gl6
)
+
(
bn2
-
offset
*
(
3
*
rr3
*
rr1
*
rr1
))
*
(
gl2
+
gl7
+
gl8
)
+
(
bn3
-
offset
*
(
15
*
rr3
*
rr3
*
rr1
))
*
(
gl3
+
gl5
)
+
(
bn4
-
offset
*
(
105
*
rr3
*
rr3
*
rr3
))
*
gl4
);
#else
energy
+=
forceFactor
*
(
bn1
*
(
gl1
+
gl6
)
+
bn2
*
(
gl2
+
gl7
+
gl8
)
+
bn3
*
(
gl3
+
gl5
)
+
bn4
*
gl4
);
#endif
real
gf1
=
bn1
*
gl0
+
bn2
*
(
gl1
+
gl6
)
+
bn3
*
(
gl2
+
gl7
+
gl8
)
+
bn4
*
(
gl3
+
gl5
)
+
bn5
*
gl4
;
#ifdef APPLY_SCALE
gf1
-=
offset
*
(
rr3
*
gl0
+
(
3
*
rr3
*
rr1
*
rr1
)
*
(
gl1
+
gl6
)
+
(
15
*
rr3
*
rr3
*
rr1
)
*
(
gl2
+
gl7
+
gl8
)
+
(
105
*
rr3
*
rr3
*
rr3
)
*
(
gl3
+
gl5
)
+
(
945
*
rr3
*
rr3
*
rr3
*
rr1
*
rr1
)
*
gl4
);
#endif
ftm21
+=
gf1
*
xr
;
ftm22
+=
gf1
*
yr
;
ftm23
+=
gf1
*
zr
;
#ifdef APPLY_SCALE
real
gf2
=
-
ck
*
bn1
+
sc4
*
bn2
-
sc6
*
bn3
-
offset
*
(
-
ck
*
rr3
+
sc4
*
(
3
*
rr3
*
rr1
*
rr1
)
-
sc6
*
(
15
*
rr3
*
rr3
*
rr1
));
#else
real
gf2
=
-
ck
*
bn1
+
sc4
*
bn2
-
sc6
*
bn3
;
#endif
ftm21
+=
gf2
*
di1
;
ftm22
+=
gf2
*
di2
;
ftm23
+=
gf2
*
di3
;
#ifdef APPLY_SCALE
real
gf5
=
2
*
(
-
ck
*
bn2
+
sc4
*
bn3
-
sc6
*
bn4
-
offset
*
(
-
ck
*
(
3
*
rr3
*
rr1
*
rr1
)
+
sc4
*
(
15
*
rr3
*
rr3
*
rr1
)
-
sc6
*
(
105
*
rr3
*
rr3
*
rr3
)));
#else
real
gf5
=
2
*
(
-
ck
*
bn2
+
sc4
*
bn3
-
sc6
*
bn4
);
#endif
ftm21
+=
gf5
*
qir1
;
ftm22
+=
gf5
*
qir2
;
ftm23
+=
gf5
*
qir3
;
#ifdef APPLY_SCALE
real
gf3
=
ci
*
bn1
+
sc3
*
bn2
+
sc5
*
bn3
-
offset
*
(
ci
*
rr3
+
sc3
*
(
3
*
rr3
*
rr1
*
rr1
)
+
sc5
*
(
15
*
rr3
*
rr3
*
rr1
));
#else
real
gf3
=
ci
*
bn1
+
sc3
*
bn2
+
sc5
*
bn3
;
#endif
ftm21
+=
gf3
*
dk1
;
ftm22
+=
gf3
*
dk2
;
ftm23
+=
gf3
*
dk3
;
#ifdef APPLY_SCALE
real
gf6
=
2
*
(
-
ci
*
bn2
-
sc3
*
bn3
-
sc5
*
bn4
-
offset
*
(
-
ci
*
(
3
*
rr3
*
rr1
*
rr1
)
-
sc3
*
(
15
*
rr3
*
rr3
*
rr1
)
-
sc5
*
(
105
*
rr3
*
rr3
*
rr3
)));
#else
real
gf6
=
2
*
(
-
ci
*
bn2
-
sc3
*
bn3
-
sc5
*
bn4
);
#endif
ftm21
+=
gf6
*
qkr1
;
ftm22
+=
gf6
*
qkr2
;
ftm23
+=
gf6
*
qkr3
;
force
.
x
=
ftm21
;
force
.
y
=
ftm22
;
force
.
z
=
ftm23
;
}
__device__
void
#ifdef APPLY_SCALE
computeOneInteractionF2
(
#else
computeOneInteractionF2NoScale
(
#endif
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
real4
delta
,
real4
bn
,
float
forceFactor
,
#ifdef APPLY_SCALE
float
dScale
,
float
pScale
,
float
mScale
,
#endif
real3
&
force
,
real
&
energy
)
{
const
float
uScale
=
1
;
real
xr
=
delta
.
x
;
real
yr
=
delta
.
y
;
real
zr
=
delta
.
z
;
real
rr1
=
delta
.
w
;
// set the permanent multipole and induced dipole values;
real
ci
=
atom1
.
q
;
real
di1
=
atom1
.
dipole
.
x
;
real
di2
=
atom1
.
dipole
.
y
;
real
di3
=
atom1
.
dipole
.
z
;
real
qi1
=
atom1
.
quadrupoleXX
;
real
qi2
=
atom1
.
quadrupoleXY
;
real
qi3
=
atom1
.
quadrupoleXZ
;
real
qi5
=
atom1
.
quadrupoleYY
;
real
qi6
=
atom1
.
quadrupoleYZ
;
real
qi9
=
-
(
atom1
.
quadrupoleXX
+
atom1
.
quadrupoleYY
);
real
bn1
=
bn
.
x
;
real
bn2
=
bn
.
y
;
real
bn3
=
bn
.
z
;
real
bn4
=
bn
.
w
;
real
damp
=
atom1
.
damp
*
atom2
.
damp
;
if
(
damp
!=
0
)
{
real
pgamma
=
atom1
.
thole
<
atom2
.
thole
?
atom1
.
thole
:
atom2
.
thole
;
real
ratio
=
RECIP
(
rr1
*
damp
);
damp
=
-
pgamma
*
ratio
*
ratio
*
ratio
;
}
real
scale5
=
(
damp
==
0
)
?
1
:
(
1
-
(
1
-
damp
)
*
EXP
(
damp
));
real
rr5
=
rr1
*
rr1
;
rr5
=
3
*
rr1
*
rr5
*
rr5
;
#ifdef APPLY_SCALE
real
psc5
=
rr5
*
(
1
-
scale5
*
pScale
);
real
dsc5
=
rr5
*
(
1
-
scale5
*
dScale
);
real
usc5
=
rr5
*
(
1
-
scale5
*
uScale
);
#else
real
psc5
=
rr5
*
(
1
-
scale5
);
#endif
real
qiuk1
=
qi1
*
atom2
.
inducedDipole
.
x
+
qi2
*
atom2
.
inducedDipole
.
y
+
qi3
*
atom2
.
inducedDipole
.
z
;
real
qiukp1
=
qi1
*
atom2
.
inducedDipolePolar
.
x
+
qi2
*
atom2
.
inducedDipolePolar
.
y
+
qi3
*
atom2
.
inducedDipolePolar
.
z
;
real
ftm21
=
-
bn2
*
(
qiuk1
+
qiukp1
);
#ifdef APPLY_SCALE
ftm21
+=
qiuk1
*
psc5
+
qiukp1
*
dsc5
;
#else
ftm21
+=
(
qiuk1
+
qiukp1
)
*
psc5
;
#endif
real
qiuk2
=
qi2
*
atom2
.
inducedDipole
.
x
+
qi5
*
atom2
.
inducedDipole
.
y
+
qi6
*
atom2
.
inducedDipole
.
z
;
real
qiukp2
=
qi2
*
atom2
.
inducedDipolePolar
.
x
+
qi5
*
atom2
.
inducedDipolePolar
.
y
+
qi6
*
atom2
.
inducedDipolePolar
.
z
;
real
ftm22
=
-
bn2
*
(
qiuk2
+
qiukp2
);
#ifdef APPLY_SCALE
ftm22
+=
((
qiuk2
)
*
psc5
+
(
qiukp2
)
*
dsc5
);
#else
ftm22
+=
(
qiuk2
+
qiukp2
)
*
psc5
;
#endif
real
qiuk3
=
qi3
*
atom2
.
inducedDipole
.
x
+
qi6
*
atom2
.
inducedDipole
.
y
+
qi9
*
atom2
.
inducedDipole
.
z
;
real
qiukp3
=
qi3
*
atom2
.
inducedDipolePolar
.
x
+
qi6
*
atom2
.
inducedDipolePolar
.
y
+
qi9
*
atom2
.
inducedDipolePolar
.
z
;
real
ftm23
=
-
bn2
*
(
qiuk3
+
qiukp3
);
#ifdef APPLY_SCALE
ftm23
+=
((
qiuk3
)
*
psc5
+
(
qiukp3
)
*
dsc5
);
#else
ftm23
+=
(
qiuk3
+
qiukp3
)
*
psc5
;
#endif
real
expdamp
=
EXP
(
damp
);
real
scale3
=
(
damp
==
0
)
?
1
:
(
1
-
expdamp
);
real
rr3
=
rr1
*
rr1
*
rr1
;
#ifdef APPLY_SCALE
real
psc3
=
rr3
*
(
1
-
scale3
*
pScale
);
real
dsc3
=
rr3
*
(
1
-
scale3
*
dScale
);
real
usc3
=
rr3
*
(
1
-
scale3
*
uScale
);
#else
real
psc3
=
rr3
*
(
1
-
scale3
);
#endif
real
scale7
=
(
damp
==
0
)
?
1
:
(
1
-
(
1
-
damp
+
0.6
f
*
damp
*
damp
)
*
expdamp
);
#ifdef APPLY_SCALE
real
psc7
=
(
15
*
rr3
*
rr3
*
rr1
)
*
(
1
-
scale7
*
pScale
);
real
dsc7
=
(
15
*
rr3
*
rr3
*
rr1
)
*
(
1
-
scale7
*
dScale
);
#else
real
psc7
=
(
15
*
rr3
*
rr3
*
rr1
)
*
(
1
-
scale7
);
#endif
real
qir1
=
qi1
*
xr
+
qi2
*
yr
+
qi3
*
zr
;
real
qir2
=
qi2
*
xr
+
qi5
*
yr
+
qi6
*
zr
;
real
qir3
=
qi3
*
xr
+
qi6
*
yr
+
qi9
*
zr
;
real
sc3
=
di1
*
xr
+
di2
*
yr
+
di3
*
zr
;
real
sc5
=
qir1
*
xr
+
qir2
*
yr
+
qir3
*
zr
;
real
gfi3
=
ci
*
bn1
+
sc3
*
bn2
+
sc5
*
bn3
;
real
prefactor1
;
prefactor1
=
0.5
f
*
(
ci
*
psc3
+
sc3
*
psc5
+
sc5
*
psc7
-
gfi3
);
ftm21
-=
prefactor1
*
atom2
.
inducedDipole
.
x
;
ftm22
-=
prefactor1
*
atom2
.
inducedDipole
.
y
;
ftm23
-=
prefactor1
*
atom2
.
inducedDipole
.
z
;
#ifdef APPLY_SCALE
prefactor1
=
0.5
f
*
(
ci
*
dsc3
+
sc3
*
dsc5
+
sc5
*
dsc7
-
gfi3
);
#endif
ftm21
-=
prefactor1
*
atom2
.
inducedDipolePolar
.
x
;
ftm22
-=
prefactor1
*
atom2
.
inducedDipolePolar
.
y
;
ftm23
-=
prefactor1
*
atom2
.
inducedDipolePolar
.
z
;
real
sci4
=
atom2
.
inducedDipole
.
x
*
xr
+
atom2
.
inducedDipole
.
y
*
yr
+
atom2
.
inducedDipole
.
z
*
zr
;
energy
+=
forceFactor
*
0.5
f
*
sci4
*
((
psc3
-
bn1
)
*
ci
+
(
psc5
-
bn2
)
*
sc3
+
(
psc7
-
bn3
)
*
sc5
);
real
scip4
=
atom2
.
inducedDipolePolar
.
x
*
xr
+
atom2
.
inducedDipolePolar
.
y
*
yr
+
atom2
.
inducedDipolePolar
.
z
*
zr
;
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
prefactor1
=
0.5
f
*
(
bn2
-
usc5
);
#else
prefactor1
=
0.5
f
*
(
bn2
-
psc5
);
#endif
ftm21
+=
prefactor1
*
((
sci4
*
atom1
.
inducedDipolePolar
.
x
+
scip4
*
atom1
.
inducedDipole
.
x
));
ftm22
+=
prefactor1
*
((
sci4
*
atom1
.
inducedDipolePolar
.
y
+
scip4
*
atom1
.
inducedDipole
.
y
));
ftm23
+=
prefactor1
*
((
sci4
*
atom1
.
inducedDipolePolar
.
z
+
scip4
*
atom1
.
inducedDipole
.
z
));
#endif
#ifdef APPLY_SCALE
prefactor1
=
0.5
f
*
(
bn2
*
(
sci4
+
scip4
)
-
(
sci4
*
psc5
+
scip4
*
dsc5
));
#else
sci4
+=
scip4
;
prefactor1
=
0.5
f
*
sci4
*
(
bn2
-
psc5
);
#endif
ftm21
+=
prefactor1
*
di1
;
ftm22
+=
prefactor1
*
di2
;
ftm23
+=
prefactor1
*
di3
;
#ifdef APPLY_SCALE
real
gfi5
=
bn3
*
(
sci4
+
scip4
)
-
(
sci4
*
psc7
+
scip4
*
dsc7
);
#else
real
gfi5
=
sci4
*
(
bn3
-
psc7
);
#endif
ftm21
+=
gfi5
*
qir1
;
ftm22
+=
gfi5
*
qir2
;
ftm23
+=
gfi5
*
qir3
;
real
sci7
=
qir1
*
atom2
.
inducedDipole
.
x
+
qir2
*
atom2
.
inducedDipole
.
y
+
qir3
*
atom2
.
inducedDipole
.
z
;
energy
+=
forceFactor
*
(
bn2
-
psc5
)
*
sci7
;
real
scip7
=
qir1
*
atom2
.
inducedDipolePolar
.
x
+
qir2
*
atom2
.
inducedDipolePolar
.
y
+
qir3
*
atom2
.
inducedDipolePolar
.
z
;
#ifdef APPLY_SCALE
real
gli1
=
-
ci
*
sci4
;
real
gli2
=
-
sc3
*
sci4
+
2
*
sci7
;
real
gli3
=
-
sci4
*
sc5
;
real
glip1
=
-
ci
*
scip4
;
real
glip2
=
-
sc3
*
scip4
+
2
*
scip7
;
real
glip3
=
-
scip4
*
sc5
;
#else
real
gli1
=
-
ci
*
sci4
;
real
gli2
=
-
sc3
*
sci4
+
2
*
(
sci7
+
scip7
);
real
gli3
=
-
sci4
*
sc5
;
#endif
#ifdef APPLY_SCALE
real
gfi1
=
(
bn2
*
(
gli1
+
glip1
)
+
bn3
*
(
gli2
+
glip2
)
+
bn4
*
(
gli3
+
glip3
));
gfi1
-=
(
rr1
*
rr1
)
*
(
3
*
(
gli1
*
psc3
+
glip1
*
dsc3
)
+
5
*
(
gli2
*
psc5
+
glip2
*
dsc5
)
+
7
*
(
gli3
*
psc7
+
glip3
*
dsc7
));
#else
real
gfi1
=
bn2
*
gli1
+
bn3
*
gli2
+
bn4
*
gli3
;
gfi1
-=
(
rr1
*
rr1
)
*
(
3
*
gli1
*
psc3
+
5
*
gli2
*
psc5
+
7
*
gli3
*
psc7
);
#endif
gfi1
*=
0.5
f
;
ftm21
+=
gfi1
*
xr
;
ftm22
+=
gfi1
*
yr
;
ftm23
+=
gfi1
*
zr
;
{
real
expdamp
=
EXP
(
damp
);
real
temp3
=
-
1.5
f
*
damp
*
expdamp
*
rr1
*
rr1
;
real
temp5
=
-
damp
;
real
temp7
=
-
0.2
f
-
0.6
f
*
damp
;
real
ddsc31
=
temp3
*
xr
;
real
ddsc32
=
temp3
*
yr
;
real
ddsc33
=
temp3
*
zr
;
real
ddsc51
=
temp5
*
ddsc31
;
real
ddsc52
=
temp5
*
ddsc32
;
real
ddsc53
=
temp5
*
ddsc33
;
real
ddsc71
=
temp7
*
ddsc51
;
real
ddsc72
=
temp7
*
ddsc52
;
real
ddsc73
=
temp7
*
ddsc53
;
real
rr3
=
rr1
*
rr1
*
rr1
;
#ifdef APPLY_SCALE
temp3
=
(
gli1
*
pScale
+
glip1
*
dScale
);
temp5
=
(
3
*
rr1
*
rr1
)
*
(
gli2
*
pScale
+
glip2
*
dScale
);
temp7
=
(
15
*
rr3
*
rr1
)
*
(
gli3
*
pScale
+
glip3
*
dScale
);
#else
temp3
=
gli1
;
temp5
=
(
3
*
rr1
*
rr1
)
*
gli2
;
temp7
=
(
15
*
rr3
*
rr1
)
*
gli3
;
#endif
ftm21
-=
rr3
*
(
temp3
*
ddsc31
+
temp5
*
ddsc51
+
temp7
*
ddsc71
);
ftm22
-=
rr3
*
(
temp3
*
ddsc32
+
temp5
*
ddsc52
+
temp7
*
ddsc72
);
ftm23
-=
rr3
*
(
temp3
*
ddsc33
+
temp5
*
ddsc53
+
temp7
*
ddsc73
);
}
//K
real
qk1
=
atom2
.
quadrupoleXX
;
real
qk2
=
atom2
.
quadrupoleXY
;
real
qk3
=
atom2
.
quadrupoleXZ
;
real
qk5
=
atom2
.
quadrupoleYY
;
real
qk6
=
atom2
.
quadrupoleYZ
;
real
qk9
=
-
(
qk1
+
qk5
);
real
qkui1
=
qk1
*
atom1
.
inducedDipole
.
x
+
qk2
*
atom1
.
inducedDipole
.
y
+
qk3
*
atom1
.
inducedDipole
.
z
;
real
qkuip1
=
qk1
*
atom1
.
inducedDipolePolar
.
x
+
qk2
*
atom1
.
inducedDipolePolar
.
y
+
qk3
*
atom1
.
inducedDipolePolar
.
z
;
ftm21
+=
bn2
*
(
qkui1
+
qkuip1
);
#ifdef APPLY_SCALE
ftm21
-=
(
qkui1
*
psc5
+
qkuip1
*
dsc5
);
#else
ftm21
-=
(
qkui1
+
qkuip1
)
*
psc5
;
#endif
real
qkui2
=
qk2
*
atom1
.
inducedDipole
.
x
+
qk5
*
atom1
.
inducedDipole
.
y
+
qk6
*
atom1
.
inducedDipole
.
z
;
real
qkuip2
=
qk2
*
atom1
.
inducedDipolePolar
.
x
+
qk5
*
atom1
.
inducedDipolePolar
.
y
+
qk6
*
atom1
.
inducedDipolePolar
.
z
;
ftm22
+=
bn2
*
(
qkui2
+
qkuip2
);
#ifdef APPLY_SCALE
ftm22
-=
((
qkui2
)
*
psc5
+
(
qkuip2
)
*
dsc5
);
#else
ftm22
-=
(
qkui2
+
qkuip2
)
*
psc5
;
#endif
real
qkui3
=
qk3
*
atom1
.
inducedDipole
.
x
+
qk6
*
atom1
.
inducedDipole
.
y
+
qk9
*
atom1
.
inducedDipole
.
z
;
real
qkuip3
=
qk3
*
atom1
.
inducedDipolePolar
.
x
+
qk6
*
atom1
.
inducedDipolePolar
.
y
+
qk9
*
atom1
.
inducedDipolePolar
.
z
;
ftm23
+=
bn2
*
(
qkui3
+
qkuip3
);
#ifdef APPLY_SCALE
ftm23
-=
((
qkui3
)
*
psc5
+
(
qkuip3
)
*
dsc5
);
#else
ftm23
-=
(
qkui3
+
qkuip3
)
*
psc5
;
#endif
real
qkr1
=
qk1
*
xr
+
qk2
*
yr
+
qk3
*
zr
;
real
qkr2
=
qk2
*
xr
+
qk5
*
yr
+
qk6
*
zr
;
real
qkr3
=
qk3
*
xr
+
qk6
*
yr
+
qk9
*
zr
;
real
dk1
=
atom2
.
dipole
.
x
;
real
dk2
=
atom2
.
dipole
.
y
;
real
dk3
=
atom2
.
dipole
.
z
;
real
sc4
=
dk1
*
xr
+
dk2
*
yr
+
dk3
*
zr
;
real
sc6
=
qkr1
*
xr
+
qkr2
*
yr
+
qkr3
*
zr
;
real
ck
=
atom2
.
q
;
real
gfi2
=
(
-
ck
*
bn1
+
sc4
*
bn2
-
sc6
*
bn3
);
prefactor1
=
0.5
f
*
(
ck
*
psc3
-
sc4
*
psc5
+
sc6
*
psc7
+
gfi2
);
ftm21
+=
prefactor1
*
atom1
.
inducedDipole
.
x
;
ftm22
+=
prefactor1
*
atom1
.
inducedDipole
.
y
;
ftm23
+=
prefactor1
*
atom1
.
inducedDipole
.
z
;
#ifdef APPLY_SCALE
prefactor1
=
0.5
f
*
(
ck
*
dsc3
-
sc4
*
dsc5
+
sc6
*
dsc7
+
gfi2
);
#endif
ftm21
+=
prefactor1
*
atom1
.
inducedDipolePolar
.
x
;
ftm22
+=
prefactor1
*
atom1
.
inducedDipolePolar
.
y
;
ftm23
+=
prefactor1
*
atom1
.
inducedDipolePolar
.
z
;
real
sci3
=
atom1
.
inducedDipole
.
x
*
xr
+
atom1
.
inducedDipole
.
y
*
yr
+
atom1
.
inducedDipole
.
z
*
zr
;
energy
+=
forceFactor
*
0.5
f
*
sci3
*
(
ck
*
(
bn1
-
psc3
)
-
sc4
*
(
bn2
-
psc5
)
+
sc6
*
(
bn3
-
psc7
));
real
scip3
=
atom1
.
inducedDipolePolar
.
x
*
xr
+
atom1
.
inducedDipolePolar
.
y
*
yr
+
atom1
.
inducedDipolePolar
.
z
*
zr
;
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
prefactor1
=
0.5
f
*
(
bn2
-
usc5
);
#else
prefactor1
=
0.5
f
*
(
bn2
-
psc5
);
#endif
ftm21
+=
prefactor1
*
(
sci3
*
atom2
.
inducedDipolePolar
.
x
+
scip3
*
atom2
.
inducedDipole
.
x
);
ftm22
+=
prefactor1
*
(
sci3
*
atom2
.
inducedDipolePolar
.
y
+
scip3
*
atom2
.
inducedDipole
.
y
);
ftm23
+=
prefactor1
*
(
sci3
*
atom2
.
inducedDipolePolar
.
z
+
scip3
*
atom2
.
inducedDipole
.
z
);
real
sci34
;
sci4
=
atom2
.
inducedDipole
.
x
*
xr
+
atom2
.
inducedDipole
.
y
*
yr
+
atom2
.
inducedDipole
.
z
*
zr
;
scip4
=
atom2
.
inducedDipolePolar
.
x
*
xr
+
atom2
.
inducedDipolePolar
.
y
*
yr
+
atom2
.
inducedDipolePolar
.
z
*
zr
;
sci34
=
(
sci3
*
scip4
+
scip3
*
sci4
);
#ifdef APPLY_SCALE
gfi1
=
sci34
*
(
usc5
*
(
5
*
rr1
*
rr1
)
-
bn3
);
#else
gfi1
=
sci34
*
(
psc5
*
(
5
*
rr1
*
rr1
)
-
bn3
);
#endif
#else
gfi1
=
0
;
#endif
#ifdef APPLY_SCALE
prefactor1
=
0.5
f
*
(
bn2
*
(
sci3
+
scip3
)
-
(
sci3
*
psc5
+
scip3
*
dsc5
));
#else
sci3
+=
scip3
;
prefactor1
=
0.5
f
*
sci3
*
(
bn2
-
psc5
);
#endif
ftm21
+=
prefactor1
*
dk1
;
ftm22
+=
prefactor1
*
dk2
;
ftm23
+=
prefactor1
*
dk3
;
#ifdef APPLY_SCALE
real
gfi6
=
-
bn3
*
(
sci3
+
scip3
)
+
(
sci3
*
psc7
+
scip3
*
dsc7
);
#else
real
gfi6
=
sci3
*
(
psc7
-
bn3
);
#endif
ftm21
+=
gfi6
*
qkr1
;
ftm22
+=
gfi6
*
qkr2
;
ftm23
+=
gfi6
*
qkr3
;
real
sci1
=
atom1
.
inducedDipole
.
x
*
dk1
+
atom1
.
inducedDipole
.
y
*
dk2
+
atom1
.
inducedDipole
.
z
*
dk3
+
di1
*
atom2
.
inducedDipole
.
x
+
di2
*
atom2
.
inducedDipole
.
y
+
di3
*
atom2
.
inducedDipole
.
z
;
energy
+=
forceFactor
*
0.5
f
*
(
sci1
*
(
bn1
-
psc3
));
real
sci8
=
qkr1
*
atom1
.
inducedDipole
.
x
+
qkr2
*
atom1
.
inducedDipole
.
y
+
qkr3
*
atom1
.
inducedDipole
.
z
;
energy
-=
forceFactor
*
sci8
*
(
bn2
-
psc5
);
real
scip1
=
atom1
.
inducedDipolePolar
.
x
*
dk1
+
atom1
.
inducedDipolePolar
.
y
*
dk2
+
atom1
.
inducedDipolePolar
.
z
*
dk3
+
di1
*
atom2
.
inducedDipolePolar
.
x
+
di2
*
atom2
.
inducedDipolePolar
.
y
+
di3
*
atom2
.
inducedDipolePolar
.
z
;
#ifndef APPLY_SCALE
sci1
+=
scip1
;
#endif
real
scip2
=
atom1
.
inducedDipole
.
x
*
atom2
.
inducedDipolePolar
.
x
+
atom1
.
inducedDipole
.
y
*
atom2
.
inducedDipolePolar
.
y
+
atom1
.
inducedDipole
.
z
*
atom2
.
inducedDipolePolar
.
z
+
atom2
.
inducedDipole
.
x
*
atom1
.
inducedDipolePolar
.
x
+
atom2
.
inducedDipole
.
y
*
atom1
.
inducedDipolePolar
.
y
+
atom2
.
inducedDipole
.
z
*
atom1
.
inducedDipolePolar
.
z
;
real
scip8
=
qkr1
*
atom1
.
inducedDipolePolar
.
x
+
qkr2
*
atom1
.
inducedDipolePolar
.
y
+
qkr3
*
atom1
.
inducedDipolePolar
.
z
;
#ifndef APPLY_SCALE
sci8
+=
scip8
;
#endif
gli1
=
ck
*
sci3
+
sci1
;
gli2
=
-
(
sci3
*
sc4
+
2
*
sci8
);
gli3
=
sci3
*
sc6
;
#ifdef APPLY_SCALE
glip1
=
ck
*
scip3
+
scip1
;
glip2
=
-
(
scip3
*
sc4
+
2
*
scip8
);
glip3
=
scip3
*
sc6
;
#endif
#ifdef APPLY_SCALE
gfi1
+=
(
bn2
*
(
gli1
+
glip1
)
+
bn3
*
(
gli2
+
glip2
)
+
bn4
*
(
gli3
+
glip3
));
gfi1
-=
(
rr1
*
rr1
)
*
(
3
*
(
gli1
*
psc3
+
glip1
*
dsc3
)
+
5
*
(
gli2
*
psc5
+
glip2
*
dsc5
)
+
7
*
(
gli3
*
psc7
+
glip3
*
dsc7
));
#else
gfi1
+=
(
bn2
*
gli1
+
bn3
*
gli2
+
bn4
*
gli3
);
gfi1
-=
(
rr1
*
rr1
)
*
(
3
*
gli1
*
psc3
+
5
*
gli2
*
psc5
+
7
*
gli3
*
psc7
);
#endif
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
gfi1
+=
scip2
*
(
bn2
-
(
3
*
rr1
*
rr1
)
*
usc3
);
#else
gfi1
+=
scip2
*
(
bn2
-
(
3
*
rr1
*
rr1
)
*
psc3
);
#endif
#endif
gfi1
*=
0.5
f
;
ftm21
+=
gfi1
*
xr
;
ftm22
+=
gfi1
*
yr
;
ftm23
+=
gfi1
*
zr
;
{
real
expdamp
=
EXP
(
damp
);
real
temp3
=
-
1.5
f
*
damp
*
expdamp
*
rr1
*
rr1
;
real
temp5
=
-
damp
;
real
temp7
=
-
0.2
f
-
0.6
f
*
damp
;
real
ddsc31
=
temp3
*
xr
;
real
ddsc32
=
temp3
*
yr
;
real
ddsc33
=
temp3
*
zr
;
real
ddsc51
=
temp5
*
ddsc31
;
real
ddsc52
=
temp5
*
ddsc32
;
real
ddsc53
=
temp5
*
ddsc33
;
real
ddsc71
=
temp7
*
ddsc51
;
real
ddsc72
=
temp7
*
ddsc52
;
real
ddsc73
=
temp7
*
ddsc53
;
real
rr3
=
rr1
*
rr1
*
rr1
;
#ifdef APPLY_SCALE
temp3
=
gli1
*
pScale
+
glip1
*
dScale
;
temp5
=
(
3
*
rr1
*
rr1
)
*
(
gli2
*
pScale
+
glip2
*
dScale
);
temp7
=
(
15
*
rr3
*
rr1
)
*
(
gli3
*
pScale
+
glip3
*
dScale
);
#else
temp3
=
gli1
;
temp5
=
(
3
*
rr1
*
rr1
)
*
gli2
;
temp7
=
(
15
*
rr3
*
rr1
)
*
(
gli3
);
#endif
ftm21
-=
rr3
*
(
temp3
*
ddsc31
+
temp5
*
ddsc51
+
temp7
*
ddsc71
);
ftm22
-=
rr3
*
(
temp3
*
ddsc32
+
temp5
*
ddsc52
+
temp7
*
ddsc72
);
ftm23
-=
rr3
*
(
temp3
*
ddsc33
+
temp5
*
ddsc53
+
temp7
*
ddsc73
);
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
temp3
=
uScale
*
scip2
;
temp5
=
-
(
3
*
rr1
*
rr1
)
*
uScale
*
sci34
;
#else
temp3
=
scip2
;
temp5
=
-
(
3
*
rr1
*
rr1
)
*
sci34
;
#endif
ftm21
-=
rr3
*
(
temp3
*
ddsc31
+
temp5
*
ddsc51
);
ftm22
-=
rr3
*
(
temp3
*
ddsc32
+
temp5
*
ddsc52
);
ftm23
-=
rr3
*
(
temp3
*
ddsc33
+
temp5
*
ddsc53
);
#endif
}
force
.
x
+=
ftm21
;
force
.
y
+=
ftm22
;
force
.
z
+=
ftm23
;
}
__device__
void
#ifdef APPLY_SCALE
computeOneInteractionT1
(
#else
computeOneInteractionT1NoScale
(
#endif
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
const
real4
delta
,
const
real4
bn
#ifdef APPLY_SCALE
,
float
dScale
,
float
pScale
,
float
mScale
#endif
)
{
real
xr
=
delta
.
x
;
real
yr
=
delta
.
y
;
real
zr
=
delta
.
z
;
#ifdef APPLY_SCALE
real
rr1
=
delta
.
w
;
#endif
// set the permanent multipole and induced dipole values;
real
di1
=
atom1
.
dipole
.
x
;
real
di2
=
atom1
.
dipole
.
y
;
real
di3
=
atom1
.
dipole
.
z
;
real
qi1
=
atom1
.
quadrupoleXX
;
real
qi2
=
atom1
.
quadrupoleXY
;
real
qi3
=
atom1
.
quadrupoleXZ
;
real
qi5
=
atom1
.
quadrupoleYY
;
real
qi6
=
atom1
.
quadrupoleYZ
;
//real qi9 = atom1.labFrameQuadrupole[5];
real
qi9
=
-
(
atom1
.
quadrupoleXX
+
atom1
.
quadrupoleYY
);
real
ck
=
atom2
.
q
;
real
dk1
=
atom2
.
dipole
.
x
;
real
dk2
=
atom2
.
dipole
.
y
;
real
dk3
=
atom2
.
dipole
.
z
;
real
qk1
=
atom2
.
quadrupoleXX
;
real
qk2
=
atom2
.
quadrupoleXY
;
real
qk3
=
atom2
.
quadrupoleXZ
;
real
qk5
=
atom2
.
quadrupoleYY
;
real
qk6
=
atom2
.
quadrupoleYZ
;
//real qk9 = atom2.labFrameQuadrupole[5];
real
qk9
=
-
(
atom2
.
quadrupoleXX
+
atom2
.
quadrupoleYY
);
real
bn1
=
bn
.
x
;
real
bn2
=
bn
.
y
;
real
bn3
=
bn
.
z
;
real
bn4
=
bn
.
w
;
// apply Thole polarization damping to scale factors
#ifdef APPLY_SCALE
real
rr2
=
rr1
*
rr1
;
real
rr3
=
rr1
*
rr2
;
real
rr5
=
3
*
rr3
*
rr2
;
real
rr7
=
5
*
rr5
*
rr2
;
real
rr9
=
7
*
rr7
*
rr2
;
real
scale
=
1
-
mScale
;
real
prefactor
=
scale
*
rr3
-
bn1
;
#else
real
prefactor
=
-
bn1
;
#endif
real
dixdk1
=
di2
*
dk3
-
di3
*
dk2
;
real
ttm21
=
prefactor
*
dixdk1
;
real
dixdk2
=
di3
*
dk1
-
di1
*
dk3
;
real
ttm22
=
prefactor
*
dixdk2
;
real
dixdk3
=
di1
*
dk2
-
di2
*
dk1
;
real
ttm23
=
prefactor
*
dixdk3
;
real
qir1
=
qi1
*
xr
+
qi2
*
yr
+
qi3
*
zr
;
real
qir2
=
qi2
*
xr
+
qi5
*
yr
+
qi6
*
zr
;
real
qir3
=
qi3
*
xr
+
qi6
*
yr
+
qi9
*
zr
;
real
qkr1
=
qk1
*
xr
+
qk2
*
yr
+
qk3
*
zr
;
real
qkr2
=
qk2
*
xr
+
qk5
*
yr
+
qk6
*
zr
;
real
qkr3
=
qk3
*
xr
+
qk6
*
yr
+
qk9
*
zr
;
real
qiqkr1
=
qi1
*
qkr1
+
qi2
*
qkr2
+
qi3
*
qkr3
;
real
qiqkr2
=
qi2
*
qkr1
+
qi5
*
qkr2
+
qi6
*
qkr3
;
real
qiqkr3
=
qi3
*
qkr1
+
qi6
*
qkr2
+
qi9
*
qkr3
;
real
rxqikr1
=
yr
*
qiqkr3
-
zr
*
qiqkr2
;
real
qkrxqir1
=
qkr2
*
qir3
-
qkr3
*
qir2
;
#ifdef APPLY_SCALE
prefactor
=
4
*
(
bn3
-
scale
*
rr7
);
#else
prefactor
=
4
*
bn3
;
#endif
ttm21
-=
prefactor
*
(
rxqikr1
+
qkrxqir1
);
real
rxqikr2
=
zr
*
qiqkr1
-
xr
*
qiqkr3
;
real
qkrxqir2
=
qkr3
*
qir1
-
qkr1
*
qir3
;
ttm22
-=
prefactor
*
(
rxqikr2
+
qkrxqir2
);
real
rxqikr3
=
xr
*
qiqkr2
-
yr
*
qiqkr1
;
real
qkrxqir3
=
qkr1
*
qir2
-
qkr2
*
qir1
;
ttm23
-=
prefactor
*
(
rxqikr3
+
qkrxqir3
);
real
qidk1
=
qi1
*
dk1
+
qi2
*
dk2
+
qi3
*
dk3
;
real
qidk2
=
qi2
*
dk1
+
qi5
*
dk2
+
qi6
*
dk3
;
real
qidk3
=
qi3
*
dk1
+
qi6
*
dk2
+
qi9
*
dk3
;
real
dixqkr1
=
di2
*
qkr3
-
di3
*
qkr2
;
real
dkxqir1
=
dk2
*
qir3
-
dk3
*
qir2
;
real
rxqidk1
=
yr
*
qidk3
-
zr
*
qidk2
;
real
qixqk1
=
qi2
*
qk3
+
qi5
*
qk6
+
qi6
*
qk9
-
qi3
*
qk2
-
qi6
*
qk5
-
qi9
*
qk6
;
#ifdef APPLY_SCALE
prefactor
=
2
*
(
bn2
-
scale
*
rr5
);
#else
prefactor
=
2
*
bn2
;
#endif
ttm21
+=
prefactor
*
(
dixqkr1
+
dkxqir1
+
rxqidk1
-
2
*
qixqk1
);
real
dixqkr2
=
di3
*
qkr1
-
di1
*
qkr3
;
real
dkxqir2
=
dk3
*
qir1
-
dk1
*
qir3
;
real
rxqidk2
=
zr
*
qidk1
-
xr
*
qidk3
;
real
qixqk2
=
qi3
*
qk1
+
qi6
*
qk2
+
qi9
*
qk3
-
qi1
*
qk3
-
qi2
*
qk6
-
qi3
*
qk9
;
ttm22
+=
prefactor
*
(
dixqkr2
+
dkxqir2
+
rxqidk2
-
2
*
qixqk2
);
real
dixqkr3
=
di1
*
qkr2
-
di2
*
qkr1
;
real
dkxqir3
=
dk1
*
qir2
-
dk2
*
qir1
;
real
rxqidk3
=
xr
*
qidk2
-
yr
*
qidk1
;
real
qixqk3
=
qi1
*
qk2
+
qi2
*
qk5
+
qi3
*
qk6
-
qi2
*
qk1
-
qi5
*
qk2
-
qi6
*
qk3
;
ttm23
+=
prefactor
*
(
dixqkr3
+
dkxqir3
+
rxqidk3
-
2
*
qixqk3
);
real
sc4
=
dk1
*
xr
+
dk2
*
yr
+
dk3
*
zr
;
real
sc6
=
qkr1
*
xr
+
qkr2
*
yr
+
qkr3
*
zr
;
real
gf2
=
-
ck
*
bn1
+
sc4
*
bn2
-
sc6
*
bn3
;
#ifdef APPLY_SCALE
real
gfr2
=
-
ck
*
rr3
+
sc4
*
rr5
-
sc6
*
rr7
;
prefactor
=
(
gf2
-
scale
*
gfr2
);
#else
prefactor
=
gf2
;
#endif
ttm21
+=
prefactor
*
(
di2
*
zr
-
di3
*
yr
);
ttm22
+=
prefactor
*
(
di3
*
xr
-
di1
*
zr
);
ttm23
+=
prefactor
*
(
di1
*
yr
-
di2
*
xr
);
real
gf5
=
(
-
ck
*
bn2
+
sc4
*
bn3
-
sc6
*
bn4
);
#ifdef APPLY_SCALE
real
gfr5
=
(
-
ck
*
rr5
+
sc4
*
rr7
-
sc6
*
rr9
);
prefactor
=
2
*
(
gf5
-
scale
*
gfr5
);
#else
prefactor
=
2
*
gf5
;
#endif
real
rxqir1
=
yr
*
qir3
-
zr
*
qir2
;
real
rxqir2
=
zr
*
qir1
-
xr
*
qir3
;
real
rxqir3
=
xr
*
qir2
-
yr
*
qir1
;
ttm21
-=
prefactor
*
rxqir1
;
ttm22
-=
prefactor
*
rxqir2
;
ttm23
-=
prefactor
*
rxqir3
;
atom1
.
torque
.
x
+=
ttm21
;
atom1
.
torque
.
y
+=
ttm22
;
atom1
.
torque
.
z
+=
ttm23
;
}
__device__
void
#ifdef APPLY_SCALE
computeOneInteractionT2
(
#else
computeOneInteractionT2NoScale
(
#endif
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
const
real4
delta
,
const
real4
bn
#ifdef APPLY_SCALE
,
float
dScale
,
float
pScale
,
float
mScale
#endif
)
{
real
xr
=
delta
.
x
;
real
yr
=
delta
.
y
;
real
zr
=
delta
.
z
;
real
rr1
=
delta
.
w
;
// set the permanent multipole and induced dipole values;
real
di1
=
atom1
.
dipole
.
x
;
real
di2
=
atom1
.
dipole
.
y
;
real
di3
=
atom1
.
dipole
.
z
;
real
qi1
=
atom1
.
quadrupoleXX
;
real
qi2
=
atom1
.
quadrupoleXY
;
real
qi3
=
atom1
.
quadrupoleXZ
;
real
qi5
=
atom1
.
quadrupoleYY
;
real
qi6
=
atom1
.
quadrupoleYZ
;
real
qi9
=
-
(
atom1
.
quadrupoleXX
+
atom1
.
quadrupoleYY
);
real
bn1
=
bn
.
x
;
real
bn2
=
bn
.
y
;
real
bn3
=
bn
.
z
;
// apply Thole polarization damping to scale factors
real
scale3
=
1
;
real
scale5
=
1
;
real
scale7
=
1
;
real
damp
=
atom1
.
damp
*
atom2
.
damp
;
if
(
damp
!=
0
)
{
real
pgamma
=
atom1
.
thole
<
atom2
.
thole
?
atom1
.
thole
:
atom2
.
thole
;
real
ratio
=
RECIP
(
rr1
*
damp
);
damp
=
-
pgamma
*
ratio
*
ratio
*
ratio
;
real
expdamp
=
EXP
(
damp
);
scale3
=
1
-
expdamp
;
scale5
=
1
-
(
1
-
damp
)
*
expdamp
;
scale7
=
1
-
(
1
-
damp
+
0.6
f
*
damp
*
damp
)
*
expdamp
;
}
real
rr3
=
rr1
*
rr1
*
rr1
;
#ifdef APPLY_SCALE
real
dsc3
=
rr3
*
(
1
-
scale3
*
dScale
);
real
dsc5
=
(
3
*
rr3
*
rr1
*
rr1
)
*
(
1
-
scale5
*
dScale
);
real
dsc7
=
(
15
*
rr3
*
rr3
*
rr1
)
*
(
1
-
scale7
*
dScale
);
real
psc3
=
rr3
*
(
1
-
scale3
*
pScale
);
real
psc5
=
(
3
*
rr3
*
rr1
*
rr1
)
*
(
1
-
scale5
*
pScale
);
real
psc7
=
(
15
*
rr3
*
rr3
*
rr1
)
*
(
1
-
scale7
*
pScale
);
#else
real
psc3
=
rr3
*
(
1
-
scale3
);
real
psc5
=
(
3
*
rr3
*
rr1
*
rr1
)
*
(
1
-
scale5
);
real
psc7
=
(
15
*
rr3
*
rr3
*
rr1
)
*
(
1
-
scale7
);
#endif
real
prefactor1
=
0.5
f
*
(
psc3
-
bn1
);
#ifdef APPLY_SCALE
real
prefactor2
=
0.5
f
*
(
dsc3
-
bn1
);
#endif
real
dixuk1
=
di2
*
atom2
.
inducedDipole
.
z
-
di3
*
atom2
.
inducedDipole
.
y
;
real
dixukp1
=
di2
*
atom2
.
inducedDipolePolar
.
z
-
di3
*
atom2
.
inducedDipolePolar
.
y
;
#ifdef APPLY_SCALE
real
ttm2i1
=
prefactor1
*
dixuk1
+
prefactor2
*
dixukp1
;
#else
real
ttm2i1
=
prefactor1
*
(
dixuk1
+
dixukp1
);
#endif
real
dixuk2
=
di3
*
atom2
.
inducedDipole
.
x
-
di1
*
atom2
.
inducedDipole
.
z
;
real
dixukp2
=
di3
*
atom2
.
inducedDipolePolar
.
x
-
di1
*
atom2
.
inducedDipolePolar
.
z
;
#ifdef APPLY_SCALE
real
ttm2i2
=
prefactor1
*
dixuk2
+
prefactor2
*
dixukp2
;
#else
real
ttm2i2
=
prefactor1
*
(
dixuk2
+
dixukp2
);
#endif
real
dixuk3
=
di1
*
atom2
.
inducedDipole
.
y
-
di2
*
atom2
.
inducedDipole
.
x
;
real
dixukp3
=
di1
*
atom2
.
inducedDipolePolar
.
y
-
di2
*
atom2
.
inducedDipolePolar
.
x
;
#ifdef APPLY_SCALE
real
ttm2i3
=
prefactor1
*
dixuk3
+
prefactor2
*
dixukp3
;
#else
real
ttm2i3
=
prefactor1
*
(
dixuk3
+
dixukp3
);
#endif
real
sci4
=
atom2
.
inducedDipole
.
x
*
xr
+
atom2
.
inducedDipole
.
y
*
yr
+
atom2
.
inducedDipole
.
z
*
zr
;
real
scip4
=
atom2
.
inducedDipolePolar
.
x
*
xr
+
atom2
.
inducedDipolePolar
.
y
*
yr
+
atom2
.
inducedDipolePolar
.
z
*
zr
;
real
gti2
=
bn2
*
(
sci4
+
scip4
);
#ifdef APPLY_SCALE
real
gtri2
=
(
sci4
*
psc5
+
scip4
*
dsc5
);
#else
real
gtri2
=
psc5
*
(
sci4
+
scip4
);
#endif
prefactor1
=
0.5
f
*
(
gti2
-
gtri2
);
ttm2i1
+=
prefactor1
*
(
di2
*
zr
-
di3
*
yr
);
ttm2i2
+=
prefactor1
*
(
di3
*
xr
-
di1
*
zr
);
ttm2i3
+=
prefactor1
*
(
di1
*
yr
-
di2
*
xr
);
real
qir1
=
qi1
*
xr
+
qi2
*
yr
+
qi3
*
zr
;
real
qir2
=
qi2
*
xr
+
qi5
*
yr
+
qi6
*
zr
;
real
qir3
=
qi3
*
xr
+
qi6
*
yr
+
qi9
*
zr
;
#ifdef APPLY_SCALE
prefactor1
=
sci4
*
psc7
+
scip4
*
dsc7
-
bn3
*
(
sci4
+
scip4
);
#else
prefactor1
=
psc7
*
(
sci4
+
scip4
)
-
bn3
*
(
sci4
+
scip4
);
#endif
ttm2i1
+=
prefactor1
*
(
yr
*
qir3
-
zr
*
qir2
);
ttm2i2
+=
prefactor1
*
(
zr
*
qir1
-
xr
*
qir3
);
ttm2i3
+=
prefactor1
*
(
xr
*
qir2
-
yr
*
qir1
);
real
qiuk1
=
qi1
*
atom2
.
inducedDipole
.
x
+
qi2
*
atom2
.
inducedDipole
.
y
+
qi3
*
atom2
.
inducedDipole
.
z
;
real
qiuk2
=
qi2
*
atom2
.
inducedDipole
.
x
+
qi5
*
atom2
.
inducedDipole
.
y
+
qi6
*
atom2
.
inducedDipole
.
z
;
real
qiuk3
=
qi3
*
atom2
.
inducedDipole
.
x
+
qi6
*
atom2
.
inducedDipole
.
y
+
qi9
*
atom2
.
inducedDipole
.
z
;
real
qiukp1
=
qi1
*
atom2
.
inducedDipolePolar
.
x
+
qi2
*
atom2
.
inducedDipolePolar
.
y
+
qi3
*
atom2
.
inducedDipolePolar
.
z
;
real
qiukp2
=
qi2
*
atom2
.
inducedDipolePolar
.
x
+
qi5
*
atom2
.
inducedDipolePolar
.
y
+
qi6
*
atom2
.
inducedDipolePolar
.
z
;
real
qiukp3
=
qi3
*
atom2
.
inducedDipolePolar
.
x
+
qi6
*
atom2
.
inducedDipolePolar
.
y
+
qi9
*
atom2
.
inducedDipolePolar
.
z
;
prefactor1
=
(
bn2
-
psc5
);
#ifdef APPLY_SCALE
prefactor2
=
(
bn2
-
dsc5
);
#endif
real
ukxqir1
=
atom2
.
inducedDipole
.
y
*
qir3
-
atom2
.
inducedDipole
.
z
*
qir2
;
real
ukxqirp1
=
atom2
.
inducedDipolePolar
.
y
*
qir3
-
atom2
.
inducedDipolePolar
.
z
*
qir2
;
real
rxqiuk1
=
yr
*
qiuk3
-
zr
*
qiuk2
;
real
rxqiukp1
=
yr
*
qiukp3
-
zr
*
qiukp2
;
#ifdef APPLY_SCALE
ttm2i1
+=
prefactor1
*
(
ukxqir1
+
rxqiuk1
)
+
prefactor2
*
(
ukxqirp1
+
rxqiukp1
);
#else
ttm2i1
+=
prefactor1
*
(
ukxqir1
+
rxqiuk1
+
ukxqirp1
+
rxqiukp1
);
#endif
real
ukxqir2
=
atom2
.
inducedDipole
.
z
*
qir1
-
atom2
.
inducedDipole
.
x
*
qir3
;
real
ukxqirp2
=
atom2
.
inducedDipolePolar
.
z
*
qir1
-
atom2
.
inducedDipolePolar
.
x
*
qir3
;
real
rxqiuk2
=
zr
*
qiuk1
-
xr
*
qiuk3
;
real
rxqiukp2
=
zr
*
qiukp1
-
xr
*
qiukp3
;
#ifdef APPLY_SCALE
ttm2i2
+=
prefactor1
*
(
ukxqir2
+
rxqiuk2
)
+
prefactor2
*
(
ukxqirp2
+
rxqiukp2
);
#else
ttm2i2
+=
prefactor1
*
(
ukxqir2
+
rxqiuk2
+
ukxqirp2
+
rxqiukp2
);
#endif
real
ukxqir3
=
atom2
.
inducedDipole
.
x
*
qir2
-
atom2
.
inducedDipole
.
y
*
qir1
;
real
ukxqirp3
=
atom2
.
inducedDipolePolar
.
x
*
qir2
-
atom2
.
inducedDipolePolar
.
y
*
qir1
;
real
rxqiuk3
=
xr
*
qiuk2
-
yr
*
qiuk1
;
real
rxqiukp3
=
xr
*
qiukp2
-
yr
*
qiukp1
;
#ifdef APPLY_SCALE
ttm2i3
+=
prefactor1
*
(
ukxqir3
+
rxqiuk3
)
+
prefactor2
*
(
ukxqirp3
+
rxqiukp3
);
#else
ttm2i3
+=
prefactor1
*
(
ukxqir3
+
rxqiuk3
+
ukxqirp3
+
rxqiukp3
);
#endif
atom1
.
torque
.
x
+=
ttm2i1
;
atom1
.
torque
.
y
+=
ttm2i2
;
atom1
.
torque
.
z
+=
ttm2i3
;
}
plugins/amoeba/platforms/cuda/src/kernels/pmeElectrostaticPairForceNoQuadrupoles.cu
deleted
100644 → 0
View file @
4c00b312
__device__
void
#ifdef APPLY_SCALE
computeOneInteractionF1
(
#else
computeOneInteractionF1NoScale
(
#endif
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
real4
delta
,
real4
bn
,
real
bn5
,
float
forceFactor
,
#ifdef APPLY_SCALE
float
dScale
,
float
pScale
,
float
mScale
,
#endif
real3
&
force
,
real
&
energy
)
{
real
xr
=
delta
.
x
;
real
yr
=
delta
.
y
;
real
zr
=
delta
.
z
;
#ifdef APPLY_SCALE
real
rr1
=
delta
.
w
;
#endif
// set the permanent multipole and induced dipole values;
real
ci
=
atom1
.
q
;
real
di1
=
atom1
.
dipole
.
x
;
real
di2
=
atom1
.
dipole
.
y
;
real
di3
=
atom1
.
dipole
.
z
;
real
ck
=
atom2
.
q
;
real
dk1
=
atom2
.
dipole
.
x
;
real
dk2
=
atom2
.
dipole
.
y
;
real
dk3
=
atom2
.
dipole
.
z
;
real
bn1
=
bn
.
x
;
real
bn2
=
bn
.
y
;
real
bn3
=
bn
.
z
;
real
bn4
=
bn
.
w
;
#ifdef APPLY_SCALE
real
offset
=
1
-
mScale
;
real
rr3
=
rr1
*
rr1
*
rr1
;
real
gf4
=
2
*
(
bn2
-
3
*
offset
*
rr3
*
rr1
*
rr1
);
#else
real
gf4
=
2
*
bn2
;
#endif
real
ftm21
=
0
;
real
ftm22
=
0
;
real
ftm23
=
0
;
// calculate the scalar products for permanent components
real
gl6
=
di1
*
dk1
+
di2
*
dk2
+
di3
*
dk3
;
real
sc3
=
di1
*
xr
+
di2
*
yr
+
di3
*
zr
;
real
sc4
=
dk1
*
xr
+
dk2
*
yr
+
dk3
*
zr
;
real
gl0
=
ci
*
ck
;
real
gl1
=
ck
*
sc3
-
ci
*
sc4
;
real
gl2
=
-
sc3
*
sc4
;
#ifdef APPLY_SCALE
energy
+=
forceFactor
*
(
-
offset
*
rr1
*
gl0
+
(
bn1
-
offset
*
rr3
)
*
(
gl1
+
gl6
)
+
(
bn2
-
offset
*
(
3
*
rr3
*
rr1
*
rr1
))
*
gl2
);
#else
energy
+=
forceFactor
*
(
bn1
*
(
gl1
+
gl6
)
+
bn2
*
gl2
);
#endif
real
gf1
=
bn1
*
gl0
+
bn2
*
(
gl1
+
gl6
)
+
bn3
*
gl2
;
#ifdef APPLY_SCALE
gf1
-=
offset
*
(
rr3
*
gl0
+
(
3
*
rr3
*
rr1
*
rr1
)
*
(
gl1
+
gl6
)
+
(
15
*
rr3
*
rr3
*
rr1
)
*
gl2
);
#endif
ftm21
+=
gf1
*
xr
;
ftm22
+=
gf1
*
yr
;
ftm23
+=
gf1
*
zr
;
#ifdef APPLY_SCALE
real
gf2
=
-
ck
*
bn1
+
sc4
*
bn2
-
offset
*
(
-
ck
*
rr3
+
sc4
*
(
3
*
rr3
*
rr1
*
rr1
));
#else
real
gf2
=
-
ck
*
bn1
+
sc4
*
bn2
;
#endif
ftm21
+=
gf2
*
di1
;
ftm22
+=
gf2
*
di2
;
ftm23
+=
gf2
*
di3
;
#ifdef APPLY_SCALE
real
gf3
=
ci
*
bn1
+
sc3
*
bn2
-
offset
*
(
ci
*
rr3
+
sc3
*
(
3
*
rr3
*
rr1
*
rr1
));
#else
real
gf3
=
ci
*
bn1
+
sc3
*
bn2
;
#endif
ftm21
+=
gf3
*
dk1
;
ftm22
+=
gf3
*
dk2
;
ftm23
+=
gf3
*
dk3
;
force
.
x
=
ftm21
;
force
.
y
=
ftm22
;
force
.
z
=
ftm23
;
}
__device__
void
#ifdef APPLY_SCALE
computeOneInteractionF2
(
#else
computeOneInteractionF2NoScale
(
#endif
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
real4
delta
,
real4
bn
,
float
forceFactor
,
#ifdef APPLY_SCALE
float
dScale
,
float
pScale
,
float
mScale
,
#endif
real3
&
force
,
real
&
energy
)
{
const
float
uScale
=
1
;
real
xr
=
delta
.
x
;
real
yr
=
delta
.
y
;
real
zr
=
delta
.
z
;
real
rr1
=
delta
.
w
;
// set the permanent multipole and induced dipole values;
real
ci
=
atom1
.
q
;
real
di1
=
atom1
.
dipole
.
x
;
real
di2
=
atom1
.
dipole
.
y
;
real
di3
=
atom1
.
dipole
.
z
;
real
bn1
=
bn
.
x
;
real
bn2
=
bn
.
y
;
real
bn3
=
bn
.
z
;
real
bn4
=
bn
.
w
;
real
damp
=
atom1
.
damp
*
atom2
.
damp
;
if
(
damp
!=
0
)
{
real
pgamma
=
atom1
.
thole
<
atom2
.
thole
?
atom1
.
thole
:
atom2
.
thole
;
real
ratio
=
RECIP
(
rr1
*
damp
);
damp
=
-
pgamma
*
ratio
*
ratio
*
ratio
;
}
real
scale5
=
(
damp
==
0
)
?
1
:
(
1
-
(
1
-
damp
)
*
EXP
(
damp
));
real
rr5
=
rr1
*
rr1
;
rr5
=
3
*
rr1
*
rr5
*
rr5
;
#ifdef APPLY_SCALE
real
psc5
=
rr5
*
(
1
-
scale5
*
pScale
);
real
dsc5
=
rr5
*
(
1
-
scale5
*
dScale
);
real
usc5
=
rr5
*
(
1
-
scale5
*
uScale
);
#else
real
psc5
=
rr5
*
(
1
-
scale5
);
#endif
real
ftm21
=
0
;
real
ftm22
=
0
;
real
ftm23
=
0
;
real
expdamp
=
EXP
(
damp
);
real
scale3
=
(
damp
==
0
)
?
1
:
(
1
-
expdamp
);
real
rr3
=
rr1
*
rr1
*
rr1
;
#ifdef APPLY_SCALE
real
psc3
=
rr3
*
(
1
-
scale3
*
pScale
);
real
dsc3
=
rr3
*
(
1
-
scale3
*
dScale
);
real
usc3
=
rr3
*
(
1
-
scale3
*
uScale
);
#else
real
psc3
=
rr3
*
(
1
-
scale3
);
#endif
real
scale7
=
(
damp
==
0
)
?
1
:
(
1
-
(
1
-
damp
+
0.6
f
*
damp
*
damp
)
*
expdamp
);
#ifdef APPLY_SCALE
real
psc7
=
(
15
*
rr3
*
rr3
*
rr1
)
*
(
1
-
scale7
*
pScale
);
real
dsc7
=
(
15
*
rr3
*
rr3
*
rr1
)
*
(
1
-
scale7
*
dScale
);
#else
real
psc7
=
(
15
*
rr3
*
rr3
*
rr1
)
*
(
1
-
scale7
);
#endif
real
sc3
=
di1
*
xr
+
di2
*
yr
+
di3
*
zr
;
real
gfi3
=
ci
*
bn1
+
sc3
*
bn2
;
real
prefactor1
;
prefactor1
=
0.5
f
*
(
ci
*
psc3
+
sc3
*
psc5
-
gfi3
);
ftm21
-=
prefactor1
*
atom2
.
inducedDipole
.
x
;
ftm22
-=
prefactor1
*
atom2
.
inducedDipole
.
y
;
ftm23
-=
prefactor1
*
atom2
.
inducedDipole
.
z
;
#ifdef APPLY_SCALE
prefactor1
=
0.5
f
*
(
ci
*
dsc3
+
sc3
*
dsc5
-
gfi3
);
#endif
ftm21
-=
prefactor1
*
atom2
.
inducedDipolePolar
.
x
;
ftm22
-=
prefactor1
*
atom2
.
inducedDipolePolar
.
y
;
ftm23
-=
prefactor1
*
atom2
.
inducedDipolePolar
.
z
;
real
sci4
=
atom2
.
inducedDipole
.
x
*
xr
+
atom2
.
inducedDipole
.
y
*
yr
+
atom2
.
inducedDipole
.
z
*
zr
;
energy
+=
forceFactor
*
0.5
f
*
sci4
*
((
psc3
-
bn1
)
*
ci
+
(
psc5
-
bn2
)
*
sc3
);
real
scip4
=
atom2
.
inducedDipolePolar
.
x
*
xr
+
atom2
.
inducedDipolePolar
.
y
*
yr
+
atom2
.
inducedDipolePolar
.
z
*
zr
;
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
prefactor1
=
0.5
f
*
(
bn2
-
usc5
);
#else
prefactor1
=
0.5
f
*
(
bn2
-
psc5
);
#endif
ftm21
+=
prefactor1
*
((
sci4
*
atom1
.
inducedDipolePolar
.
x
+
scip4
*
atom1
.
inducedDipole
.
x
));
ftm22
+=
prefactor1
*
((
sci4
*
atom1
.
inducedDipolePolar
.
y
+
scip4
*
atom1
.
inducedDipole
.
y
));
ftm23
+=
prefactor1
*
((
sci4
*
atom1
.
inducedDipolePolar
.
z
+
scip4
*
atom1
.
inducedDipole
.
z
));
#endif
#ifdef APPLY_SCALE
prefactor1
=
0.5
f
*
(
bn2
*
(
sci4
+
scip4
)
-
(
sci4
*
psc5
+
scip4
*
dsc5
));
#else
sci4
+=
scip4
;
prefactor1
=
0.5
f
*
sci4
*
(
bn2
-
psc5
);
#endif
ftm21
+=
prefactor1
*
di1
;
ftm22
+=
prefactor1
*
di2
;
ftm23
+=
prefactor1
*
di3
;
#ifdef APPLY_SCALE
real
gli1
=
-
ci
*
sci4
;
real
gli2
=
-
sc3
*
sci4
;
real
glip1
=
-
ci
*
scip4
;
real
glip2
=
-
sc3
*
scip4
;
#else
real
gli1
=
-
ci
*
sci4
;
real
gli2
=
-
sc3
*
sci4
;
#endif
#ifdef APPLY_SCALE
real
gfi1
=
(
bn2
*
(
gli1
+
glip1
)
+
bn3
*
(
gli2
+
glip2
));
gfi1
-=
(
rr1
*
rr1
)
*
(
3
*
(
gli1
*
psc3
+
glip1
*
dsc3
)
+
5
*
(
gli2
*
psc5
+
glip2
*
dsc5
));
#else
real
gfi1
=
bn2
*
gli1
+
bn3
*
gli2
;
gfi1
-=
(
rr1
*
rr1
)
*
(
3
*
gli1
*
psc3
+
5
*
gli2
*
psc5
);
#endif
gfi1
*=
0.5
f
;
ftm21
+=
gfi1
*
xr
;
ftm22
+=
gfi1
*
yr
;
ftm23
+=
gfi1
*
zr
;
{
real
expdamp
=
EXP
(
damp
);
real
temp3
=
-
1.5
f
*
damp
*
expdamp
*
rr1
*
rr1
;
real
temp5
=
-
damp
;
real
temp7
=
-
0.2
f
-
0.6
f
*
damp
;
real
ddsc31
=
temp3
*
xr
;
real
ddsc32
=
temp3
*
yr
;
real
ddsc33
=
temp3
*
zr
;
real
ddsc51
=
temp5
*
ddsc31
;
real
ddsc52
=
temp5
*
ddsc32
;
real
ddsc53
=
temp5
*
ddsc33
;
real
ddsc71
=
temp7
*
ddsc51
;
real
ddsc72
=
temp7
*
ddsc52
;
real
ddsc73
=
temp7
*
ddsc53
;
real
rr3
=
rr1
*
rr1
*
rr1
;
#ifdef APPLY_SCALE
temp3
=
(
gli1
*
pScale
+
glip1
*
dScale
);
temp5
=
(
3
*
rr1
*
rr1
)
*
(
gli2
*
pScale
+
glip2
*
dScale
);
#else
temp3
=
gli1
;
temp5
=
(
3
*
rr1
*
rr1
)
*
gli2
;
#endif
ftm21
-=
rr3
*
(
temp3
*
ddsc31
+
temp5
*
ddsc51
);
ftm22
-=
rr3
*
(
temp3
*
ddsc32
+
temp5
*
ddsc52
);
ftm23
-=
rr3
*
(
temp3
*
ddsc33
+
temp5
*
ddsc53
);
}
//K
real
dk1
=
atom2
.
dipole
.
x
;
real
dk2
=
atom2
.
dipole
.
y
;
real
dk3
=
atom2
.
dipole
.
z
;
real
sc4
=
dk1
*
xr
+
dk2
*
yr
+
dk3
*
zr
;
real
ck
=
atom2
.
q
;
real
gfi2
=
(
-
ck
*
bn1
+
sc4
*
bn2
);
prefactor1
=
0.5
f
*
(
ck
*
psc3
-
sc4
*
psc5
+
gfi2
);
ftm21
+=
prefactor1
*
atom1
.
inducedDipole
.
x
;
ftm22
+=
prefactor1
*
atom1
.
inducedDipole
.
y
;
ftm23
+=
prefactor1
*
atom1
.
inducedDipole
.
z
;
#ifdef APPLY_SCALE
prefactor1
=
0.5
f
*
(
ck
*
dsc3
-
sc4
*
dsc5
+
gfi2
);
#endif
ftm21
+=
prefactor1
*
atom1
.
inducedDipolePolar
.
x
;
ftm22
+=
prefactor1
*
atom1
.
inducedDipolePolar
.
y
;
ftm23
+=
prefactor1
*
atom1
.
inducedDipolePolar
.
z
;
real
sci3
=
atom1
.
inducedDipole
.
x
*
xr
+
atom1
.
inducedDipole
.
y
*
yr
+
atom1
.
inducedDipole
.
z
*
zr
;
energy
+=
forceFactor
*
0.5
f
*
sci3
*
(
ck
*
(
bn1
-
psc3
)
-
sc4
*
(
bn2
-
psc5
));
real
scip3
=
atom1
.
inducedDipolePolar
.
x
*
xr
+
atom1
.
inducedDipolePolar
.
y
*
yr
+
atom1
.
inducedDipolePolar
.
z
*
zr
;
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
prefactor1
=
0.5
f
*
(
bn2
-
usc5
);
#else
prefactor1
=
0.5
f
*
(
bn2
-
psc5
);
#endif
ftm21
+=
prefactor1
*
(
sci3
*
atom2
.
inducedDipolePolar
.
x
+
scip3
*
atom2
.
inducedDipole
.
x
);
ftm22
+=
prefactor1
*
(
sci3
*
atom2
.
inducedDipolePolar
.
y
+
scip3
*
atom2
.
inducedDipole
.
y
);
ftm23
+=
prefactor1
*
(
sci3
*
atom2
.
inducedDipolePolar
.
z
+
scip3
*
atom2
.
inducedDipole
.
z
);
real
sci34
;
sci4
=
atom2
.
inducedDipole
.
x
*
xr
+
atom2
.
inducedDipole
.
y
*
yr
+
atom2
.
inducedDipole
.
z
*
zr
;
scip4
=
atom2
.
inducedDipolePolar
.
x
*
xr
+
atom2
.
inducedDipolePolar
.
y
*
yr
+
atom2
.
inducedDipolePolar
.
z
*
zr
;
sci34
=
(
sci3
*
scip4
+
scip3
*
sci4
);
#ifdef APPLY_SCALE
gfi1
=
sci34
*
(
usc5
*
(
5
*
rr1
*
rr1
)
-
bn3
);
#else
gfi1
=
sci34
*
(
psc5
*
(
5
*
rr1
*
rr1
)
-
bn3
);
#endif
#else
gfi1
=
0
;
#endif
#ifdef APPLY_SCALE
prefactor1
=
0.5
f
*
(
bn2
*
(
sci3
+
scip3
)
-
(
sci3
*
psc5
+
scip3
*
dsc5
));
#else
sci3
+=
scip3
;
prefactor1
=
0.5
f
*
sci3
*
(
bn2
-
psc5
);
#endif
ftm21
+=
prefactor1
*
dk1
;
ftm22
+=
prefactor1
*
dk2
;
ftm23
+=
prefactor1
*
dk3
;
#ifdef APPLY_SCALE
real
gfi6
=
-
bn3
*
(
sci3
+
scip3
)
+
(
sci3
*
psc7
+
scip3
*
dsc7
);
#else
real
gfi6
=
sci3
*
(
psc7
-
bn3
);
#endif
real
sci1
=
atom1
.
inducedDipole
.
x
*
dk1
+
atom1
.
inducedDipole
.
y
*
dk2
+
atom1
.
inducedDipole
.
z
*
dk3
+
di1
*
atom2
.
inducedDipole
.
x
+
di2
*
atom2
.
inducedDipole
.
y
+
di3
*
atom2
.
inducedDipole
.
z
;
energy
+=
forceFactor
*
0.5
f
*
(
sci1
*
(
bn1
-
psc3
));
real
scip1
=
atom1
.
inducedDipolePolar
.
x
*
dk1
+
atom1
.
inducedDipolePolar
.
y
*
dk2
+
atom1
.
inducedDipolePolar
.
z
*
dk3
+
di1
*
atom2
.
inducedDipolePolar
.
x
+
di2
*
atom2
.
inducedDipolePolar
.
y
+
di3
*
atom2
.
inducedDipolePolar
.
z
;
#ifndef APPLY_SCALE
sci1
+=
scip1
;
#endif
real
scip2
=
atom1
.
inducedDipole
.
x
*
atom2
.
inducedDipolePolar
.
x
+
atom1
.
inducedDipole
.
y
*
atom2
.
inducedDipolePolar
.
y
+
atom1
.
inducedDipole
.
z
*
atom2
.
inducedDipolePolar
.
z
+
atom2
.
inducedDipole
.
x
*
atom1
.
inducedDipolePolar
.
x
+
atom2
.
inducedDipole
.
y
*
atom1
.
inducedDipolePolar
.
y
+
atom2
.
inducedDipole
.
z
*
atom1
.
inducedDipolePolar
.
z
;
gli1
=
ck
*
sci3
+
sci1
;
gli2
=
-
sci3
*
sc4
;
#ifdef APPLY_SCALE
glip1
=
ck
*
scip3
+
scip1
;
glip2
=
-
scip3
*
sc4
;
#endif
#ifdef APPLY_SCALE
gfi1
+=
(
bn2
*
(
gli1
+
glip1
)
+
bn3
*
(
gli2
+
glip2
));
gfi1
-=
(
rr1
*
rr1
)
*
(
3
*
(
gli1
*
psc3
+
glip1
*
dsc3
)
+
5
*
(
gli2
*
psc5
+
glip2
*
dsc5
));
#else
gfi1
+=
(
bn2
*
gli1
+
bn3
*
gli2
);
gfi1
-=
(
rr1
*
rr1
)
*
(
3
*
gli1
*
psc3
+
5
*
gli2
*
psc5
);
#endif
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
gfi1
+=
scip2
*
(
bn2
-
(
3
*
rr1
*
rr1
)
*
usc3
);
#else
gfi1
+=
scip2
*
(
bn2
-
(
3
*
rr1
*
rr1
)
*
psc3
);
#endif
#endif
gfi1
*=
0.5
f
;
ftm21
+=
gfi1
*
xr
;
ftm22
+=
gfi1
*
yr
;
ftm23
+=
gfi1
*
zr
;
{
real
expdamp
=
EXP
(
damp
);
real
temp3
=
-
1.5
f
*
damp
*
expdamp
*
rr1
*
rr1
;
real
temp5
=
-
damp
;
real
temp7
=
-
0.2
f
-
0.6
f
*
damp
;
real
ddsc31
=
temp3
*
xr
;
real
ddsc32
=
temp3
*
yr
;
real
ddsc33
=
temp3
*
zr
;
real
ddsc51
=
temp5
*
ddsc31
;
real
ddsc52
=
temp5
*
ddsc32
;
real
ddsc53
=
temp5
*
ddsc33
;
real
ddsc71
=
temp7
*
ddsc51
;
real
ddsc72
=
temp7
*
ddsc52
;
real
ddsc73
=
temp7
*
ddsc53
;
real
rr3
=
rr1
*
rr1
*
rr1
;
#ifdef APPLY_SCALE
temp3
=
gli1
*
pScale
+
glip1
*
dScale
;
temp5
=
(
3
*
rr1
*
rr1
)
*
(
gli2
*
pScale
+
glip2
*
dScale
);
#else
temp3
=
gli1
;
temp5
=
(
3
*
rr1
*
rr1
)
*
gli2
;
#endif
ftm21
-=
rr3
*
(
temp3
*
ddsc31
+
temp5
*
ddsc51
);
ftm22
-=
rr3
*
(
temp3
*
ddsc32
+
temp5
*
ddsc52
);
ftm23
-=
rr3
*
(
temp3
*
ddsc33
+
temp5
*
ddsc53
);
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
temp3
=
uScale
*
scip2
;
temp5
=
-
(
3
*
rr1
*
rr1
)
*
uScale
*
sci34
;
#else
temp3
=
scip2
;
temp5
=
-
(
3
*
rr1
*
rr1
)
*
sci34
;
#endif
ftm21
-=
rr3
*
(
temp3
*
ddsc31
+
temp5
*
ddsc51
);
ftm22
-=
rr3
*
(
temp3
*
ddsc32
+
temp5
*
ddsc52
);
ftm23
-=
rr3
*
(
temp3
*
ddsc33
+
temp5
*
ddsc53
);
#endif
}
force
.
x
+=
ftm21
;
force
.
y
+=
ftm22
;
force
.
z
+=
ftm23
;
}
__device__
void
#ifdef APPLY_SCALE
computeOneInteractionT1
(
#else
computeOneInteractionT1NoScale
(
#endif
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
const
real4
delta
,
const
real4
bn
#ifdef APPLY_SCALE
,
float
dScale
,
float
pScale
,
float
mScale
#endif
)
{
real
xr
=
delta
.
x
;
real
yr
=
delta
.
y
;
real
zr
=
delta
.
z
;
#ifdef APPLY_SCALE
real
rr1
=
delta
.
w
;
#endif
// set the permanent multipole and induced dipole values;
real
di1
=
atom1
.
dipole
.
x
;
real
di2
=
atom1
.
dipole
.
y
;
real
di3
=
atom1
.
dipole
.
z
;
real
ck
=
atom2
.
q
;
real
dk1
=
atom2
.
dipole
.
x
;
real
dk2
=
atom2
.
dipole
.
y
;
real
dk3
=
atom2
.
dipole
.
z
;
real
bn1
=
bn
.
x
;
real
bn2
=
bn
.
y
;
real
bn3
=
bn
.
z
;
real
bn4
=
bn
.
w
;
// apply Thole polarization damping to scale factors
#ifdef APPLY_SCALE
real
rr2
=
rr1
*
rr1
;
real
rr3
=
rr1
*
rr2
;
real
rr5
=
3
*
rr3
*
rr2
;
real
rr7
=
5
*
rr5
*
rr2
;
real
rr9
=
7
*
rr7
*
rr2
;
real
scale
=
1
-
mScale
;
real
prefactor
=
scale
*
rr3
-
bn1
;
#else
real
prefactor
=
-
bn1
;
#endif
real
dixdk1
=
di2
*
dk3
-
di3
*
dk2
;
real
ttm21
=
prefactor
*
dixdk1
;
real
dixdk2
=
di3
*
dk1
-
di1
*
dk3
;
real
ttm22
=
prefactor
*
dixdk2
;
real
dixdk3
=
di1
*
dk2
-
di2
*
dk1
;
real
ttm23
=
prefactor
*
dixdk3
;
real
sc4
=
dk1
*
xr
+
dk2
*
yr
+
dk3
*
zr
;
real
sc6
=
0
;
real
gf2
=
-
ck
*
bn1
+
sc4
*
bn2
;
#ifdef APPLY_SCALE
real
gfr2
=
-
ck
*
rr3
+
sc4
*
rr5
;
prefactor
=
(
gf2
-
scale
*
gfr2
);
#else
prefactor
=
gf2
;
#endif
ttm21
+=
prefactor
*
(
di2
*
zr
-
di3
*
yr
);
ttm22
+=
prefactor
*
(
di3
*
xr
-
di1
*
zr
);
ttm23
+=
prefactor
*
(
di1
*
yr
-
di2
*
xr
);
atom1
.
torque
.
x
+=
ttm21
;
atom1
.
torque
.
y
+=
ttm22
;
atom1
.
torque
.
z
+=
ttm23
;
}
__device__
void
#ifdef APPLY_SCALE
computeOneInteractionT2
(
#else
computeOneInteractionT2NoScale
(
#endif
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
const
real4
delta
,
const
real4
bn
#ifdef APPLY_SCALE
,
float
dScale
,
float
pScale
,
float
mScale
#endif
)
{
real
xr
=
delta
.
x
;
real
yr
=
delta
.
y
;
real
zr
=
delta
.
z
;
real
rr1
=
delta
.
w
;
// set the permanent multipole and induced dipole values;
real
di1
=
atom1
.
dipole
.
x
;
real
di2
=
atom1
.
dipole
.
y
;
real
di3
=
atom1
.
dipole
.
z
;
real
bn1
=
bn
.
x
;
real
bn2
=
bn
.
y
;
real
bn3
=
bn
.
z
;
// apply Thole polarization damping to scale factors
real
scale3
=
1
;
real
scale5
=
1
;
real
scale7
=
1
;
real
damp
=
atom1
.
damp
*
atom2
.
damp
;
if
(
damp
!=
0
)
{
real
pgamma
=
atom1
.
thole
<
atom2
.
thole
?
atom1
.
thole
:
atom2
.
thole
;
real
ratio
=
RECIP
(
rr1
*
damp
);
damp
=
-
pgamma
*
ratio
*
ratio
*
ratio
;
real
expdamp
=
EXP
(
damp
);
scale3
=
1
-
expdamp
;
scale5
=
1
-
(
1
-
damp
)
*
expdamp
;
scale7
=
1
-
(
1
-
damp
+
0.6
f
*
damp
*
damp
)
*
expdamp
;
}
real
rr3
=
rr1
*
rr1
*
rr1
;
#ifdef APPLY_SCALE
real
dsc3
=
rr3
*
(
1
-
scale3
*
dScale
);
real
dsc5
=
(
3
*
rr3
*
rr1
*
rr1
)
*
(
1
-
scale5
*
dScale
);
real
dsc7
=
(
15
*
rr3
*
rr3
*
rr1
)
*
(
1
-
scale7
*
dScale
);
real
psc3
=
rr3
*
(
1
-
scale3
*
pScale
);
real
psc5
=
(
3
*
rr3
*
rr1
*
rr1
)
*
(
1
-
scale5
*
pScale
);
real
psc7
=
(
15
*
rr3
*
rr3
*
rr1
)
*
(
1
-
scale7
*
pScale
);
#else
real
psc3
=
rr3
*
(
1
-
scale3
);
real
psc5
=
(
3
*
rr3
*
rr1
*
rr1
)
*
(
1
-
scale5
);
real
psc7
=
(
15
*
rr3
*
rr3
*
rr1
)
*
(
1
-
scale7
);
#endif
real
prefactor1
=
0.5
f
*
(
psc3
-
bn1
);
#ifdef APPLY_SCALE
real
prefactor2
=
0.5
f
*
(
dsc3
-
bn1
);
#endif
real
dixuk1
=
di2
*
atom2
.
inducedDipole
.
z
-
di3
*
atom2
.
inducedDipole
.
y
;
real
dixukp1
=
di2
*
atom2
.
inducedDipolePolar
.
z
-
di3
*
atom2
.
inducedDipolePolar
.
y
;
#ifdef APPLY_SCALE
real
ttm2i1
=
prefactor1
*
dixuk1
+
prefactor2
*
dixukp1
;
#else
real
ttm2i1
=
prefactor1
*
(
dixuk1
+
dixukp1
);
#endif
real
dixuk2
=
di3
*
atom2
.
inducedDipole
.
x
-
di1
*
atom2
.
inducedDipole
.
z
;
real
dixukp2
=
di3
*
atom2
.
inducedDipolePolar
.
x
-
di1
*
atom2
.
inducedDipolePolar
.
z
;
#ifdef APPLY_SCALE
real
ttm2i2
=
prefactor1
*
dixuk2
+
prefactor2
*
dixukp2
;
#else
real
ttm2i2
=
prefactor1
*
(
dixuk2
+
dixukp2
);
#endif
real
dixuk3
=
di1
*
atom2
.
inducedDipole
.
y
-
di2
*
atom2
.
inducedDipole
.
x
;
real
dixukp3
=
di1
*
atom2
.
inducedDipolePolar
.
y
-
di2
*
atom2
.
inducedDipolePolar
.
x
;
#ifdef APPLY_SCALE
real
ttm2i3
=
prefactor1
*
dixuk3
+
prefactor2
*
dixukp3
;
#else
real
ttm2i3
=
prefactor1
*
(
dixuk3
+
dixukp3
);
#endif
real
sci4
=
atom2
.
inducedDipole
.
x
*
xr
+
atom2
.
inducedDipole
.
y
*
yr
+
atom2
.
inducedDipole
.
z
*
zr
;
real
scip4
=
atom2
.
inducedDipolePolar
.
x
*
xr
+
atom2
.
inducedDipolePolar
.
y
*
yr
+
atom2
.
inducedDipolePolar
.
z
*
zr
;
real
gti2
=
bn2
*
(
sci4
+
scip4
);
#ifdef APPLY_SCALE
real
gtri2
=
(
sci4
*
psc5
+
scip4
*
dsc5
);
#else
real
gtri2
=
psc5
*
(
sci4
+
scip4
);
#endif
prefactor1
=
0.5
f
*
(
gti2
-
gtri2
);
ttm2i1
+=
prefactor1
*
(
di2
*
zr
-
di3
*
yr
);
ttm2i2
+=
prefactor1
*
(
di3
*
xr
-
di1
*
zr
);
ttm2i3
+=
prefactor1
*
(
di1
*
yr
-
di2
*
xr
);
atom1
.
torque
.
x
+=
ttm2i1
;
atom1
.
torque
.
y
+=
ttm2i2
;
atom1
.
torque
.
z
+=
ttm2i3
;
}
plugins/amoeba/platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
View file @
b7088b74
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef
struct
{
real3
pos
,
force
,
torque
,
dipole
,
inducedDipole
,
inducedDipolePolar
;
real3
pos
,
force
,
torque
,
inducedDipole
,
inducedDipolePolar
,
sphericalDipole
;
real
q
;
float
thole
,
damp
;
#ifdef INCLUDE_QUADRUPOLES
real
quadrupoleXX
,
quadrupoleXY
,
quadrupoleXZ
,
quadrupoleYY
,
quadrupoleYZ
;
float
padding
;
real
sphericalQuadrupole
[
5
];
#endif
}
AtomData
;
__device__
void
computeOneInteractionF1
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
real4
delta
,
real4
bn
,
real
bn5
,
float
forceFactor
,
float
dScale
,
float
pScale
,
float
mScale
,
real3
&
force
,
real
&
energy
);
__device__
void
computeOneInteractionF2
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
real4
delta
,
real4
bn
,
float
forceFactor
,
float
dScale
,
float
pScale
,
float
mScale
,
real3
&
force
,
real
&
energy
);
__device__
void
computeOneInteractionT1
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
const
real4
delta
,
const
real4
bn
,
float
dScale
,
float
pScale
,
float
mScale
);
__device__
void
computeOneInteractionT2
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
const
real4
delta
,
const
real4
bn
,
float
dScale
,
float
pScale
,
float
mScale
);
__device__
void
computeOneInteractionF1NoScale
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
real4
delta
,
real4
bn
,
real
bn5
,
float
forceFactor
,
real3
&
force
,
real
&
energy
);
__device__
void
computeOneInteractionF2NoScale
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
real4
delta
,
real4
bn
,
float
forceFactor
,
real3
&
force
,
real
&
energy
);
__device__
void
computeOneInteractionT1NoScale
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
const
real4
delta
,
const
real4
bn
);
__device__
void
computeOneInteractionT2NoScale
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
const
real4
delta
,
const
real4
bn
);
inline
__device__
void
loadAtomData
(
AtomData
&
data
,
int
atom
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float2
*
__restrict__
dampingAndThole
)
{
inline
__device__
void
loadAtomData
(
AtomData
&
data
,
int
atom
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
sphericalDipole
,
const
real
*
__restrict__
sphericalQuadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float2
*
__restrict__
dampingAndThole
)
{
real4
atomPosq
=
posq
[
atom
];
data
.
pos
=
make_real3
(
atomPosq
.
x
,
atomPosq
.
y
,
atomPosq
.
z
);
data
.
q
=
atomPosq
.
w
;
data
.
d
ipole
.
x
=
labFrame
Dipole
[
atom
*
3
];
data
.
d
ipole
.
y
=
labFrame
Dipole
[
atom
*
3
+
1
];
data
.
d
ipole
.
z
=
labFrame
Dipole
[
atom
*
3
+
2
];
data
.
sphericalD
ipole
.
x
=
spherical
Dipole
[
atom
*
3
];
data
.
sphericalD
ipole
.
y
=
spherical
Dipole
[
atom
*
3
+
1
];
data
.
sphericalD
ipole
.
z
=
spherical
Dipole
[
atom
*
3
+
2
];
#ifdef INCLUDE_QUADRUPOLES
data
.
quadrupoleXX
=
labFrame
Quadrupole
[
atom
*
5
];
data
.
quadrupoleXY
=
labFrame
Quadrupole
[
atom
*
5
+
1
];
data
.
quadrupoleXZ
=
labFrame
Quadrupole
[
atom
*
5
+
2
];
data
.
quadrupoleYY
=
labFrame
Quadrupole
[
atom
*
5
+
3
];
data
.
quadrupoleYZ
=
labFrame
Quadrupole
[
atom
*
5
+
4
];
data
.
sphericalQuadrupole
[
0
]
=
spherical
Quadrupole
[
atom
*
5
];
data
.
sphericalQuadrupole
[
1
]
=
spherical
Quadrupole
[
atom
*
5
+
1
];
data
.
sphericalQuadrupole
[
2
]
=
spherical
Quadrupole
[
atom
*
5
+
2
];
data
.
sphericalQuadrupole
[
3
]
=
spherical
Quadrupole
[
atom
*
5
+
3
];
data
.
sphericalQuadrupole
[
4
]
=
spherical
Quadrupole
[
atom
*
5
+
4
];
#endif
data
.
inducedDipole
.
x
=
inducedDipole
[
atom
*
3
];
data
.
inducedDipole
.
y
=
inducedDipole
[
atom
*
3
+
1
];
...
...
@@ -66,99 +57,354 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
__device__
void
computeOneInteraction
(
AtomData
&
atom1
,
AtomData
&
atom2
,
bool
hasExclusions
,
float
dScale
,
float
pScale
,
float
mScale
,
float
forceFactor
,
real
&
energy
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
)
{
real4
delta
;
// Compute the displacement.
real3
delta
;
delta
.
x
=
atom2
.
pos
.
x
-
atom1
.
pos
.
x
;
delta
.
y
=
atom2
.
pos
.
y
-
atom1
.
pos
.
y
;
delta
.
z
=
atom2
.
pos
.
z
-
atom1
.
pos
.
z
;
// periodic box
APPLY_PERIODIC_TO_DELTA
(
delta
)
delta
.
w
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
if
(
delta
.
w
>
CUTOFF_SQUARED
)
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
if
(
r2
>
CUTOFF_SQUARED
)
return
;
real
r
=
SQRT
(
delta
.
w
);
real
r
alpha
=
EWALD_ALPHA
*
r
;
real
r
Inv
=
R
SQRT
(
r2
);
real
r
=
r2
*
rInv
;
real
alsq2
=
2
*
EWALD_ALPHA
*
EWALD_ALPHA
;
real
alsq2n
=
0
;
if
(
EWALD_ALPHA
>
0
)
alsq2n
=
RECIP
(
SQRT_PI
*
EWALD_ALPHA
);
real
exp2a
=
EXP
(
-
(
ralpha
*
ralpha
));
// Rotate the various dipoles and quadrupoles.
real
qiRotationMatrix
[
3
][
3
];
buildQIRotationMatrix
(
delta
,
rInv
,
qiRotationMatrix
);
real
rr1
=
RECIP
(
r
);
delta
.
w
=
rr1
;
real3
qiUindI
=
0.5
f
*
make_real3
(
qiRotationMatrix
[
0
][
1
]
*
atom1
.
inducedDipole
.
x
+
qiRotationMatrix
[
0
][
2
]
*
atom1
.
inducedDipole
.
y
+
qiRotationMatrix
[
0
][
0
]
*
atom1
.
inducedDipole
.
z
,
qiRotationMatrix
[
1
][
1
]
*
atom1
.
inducedDipole
.
x
+
qiRotationMatrix
[
1
][
2
]
*
atom1
.
inducedDipole
.
y
+
qiRotationMatrix
[
1
][
0
]
*
atom1
.
inducedDipole
.
z
,
qiRotationMatrix
[
2
][
1
]
*
atom1
.
inducedDipole
.
x
+
qiRotationMatrix
[
2
][
2
]
*
atom1
.
inducedDipole
.
y
+
qiRotationMatrix
[
2
][
0
]
*
atom1
.
inducedDipole
.
z
);
real3
qiUindJ
=
0.5
f
*
make_real3
(
qiRotationMatrix
[
0
][
1
]
*
atom2
.
inducedDipole
.
x
+
qiRotationMatrix
[
0
][
2
]
*
atom2
.
inducedDipole
.
y
+
qiRotationMatrix
[
0
][
0
]
*
atom2
.
inducedDipole
.
z
,
qiRotationMatrix
[
1
][
1
]
*
atom2
.
inducedDipole
.
x
+
qiRotationMatrix
[
1
][
2
]
*
atom2
.
inducedDipole
.
y
+
qiRotationMatrix
[
1
][
0
]
*
atom2
.
inducedDipole
.
z
,
qiRotationMatrix
[
2
][
1
]
*
atom2
.
inducedDipole
.
x
+
qiRotationMatrix
[
2
][
2
]
*
atom2
.
inducedDipole
.
y
+
qiRotationMatrix
[
2
][
0
]
*
atom2
.
inducedDipole
.
z
);
real3
qiUinpI
=
0.5
f
*
make_real3
(
qiRotationMatrix
[
0
][
1
]
*
atom1
.
inducedDipolePolar
.
x
+
qiRotationMatrix
[
0
][
2
]
*
atom1
.
inducedDipolePolar
.
y
+
qiRotationMatrix
[
0
][
0
]
*
atom1
.
inducedDipolePolar
.
z
,
qiRotationMatrix
[
1
][
1
]
*
atom1
.
inducedDipolePolar
.
x
+
qiRotationMatrix
[
1
][
2
]
*
atom1
.
inducedDipolePolar
.
y
+
qiRotationMatrix
[
1
][
0
]
*
atom1
.
inducedDipolePolar
.
z
,
qiRotationMatrix
[
2
][
1
]
*
atom1
.
inducedDipolePolar
.
x
+
qiRotationMatrix
[
2
][
2
]
*
atom1
.
inducedDipolePolar
.
y
+
qiRotationMatrix
[
2
][
0
]
*
atom1
.
inducedDipolePolar
.
z
);
real3
qiUinpJ
=
0.5
f
*
make_real3
(
qiRotationMatrix
[
0
][
1
]
*
atom2
.
inducedDipolePolar
.
x
+
qiRotationMatrix
[
0
][
2
]
*
atom2
.
inducedDipolePolar
.
y
+
qiRotationMatrix
[
0
][
0
]
*
atom2
.
inducedDipolePolar
.
z
,
qiRotationMatrix
[
1
][
1
]
*
atom2
.
inducedDipolePolar
.
x
+
qiRotationMatrix
[
1
][
2
]
*
atom2
.
inducedDipolePolar
.
y
+
qiRotationMatrix
[
1
][
0
]
*
atom2
.
inducedDipolePolar
.
z
,
qiRotationMatrix
[
2
][
1
]
*
atom2
.
inducedDipolePolar
.
x
+
qiRotationMatrix
[
2
][
2
]
*
atom2
.
inducedDipolePolar
.
y
+
qiRotationMatrix
[
2
][
0
]
*
atom2
.
inducedDipolePolar
.
z
);
real3
rotatedDipole1
=
rotateDipole
(
atom1
.
sphericalDipole
,
qiRotationMatrix
);
real3
rotatedDipole2
=
rotateDipole
(
atom2
.
sphericalDipole
,
qiRotationMatrix
);
real
rotatedQuadrupole1
[]
=
{
0
,
0
,
0
,
0
,
0
};
real
rotatedQuadrupole2
[]
=
{
0
,
0
,
0
,
0
,
0
};
#ifdef INCLUDE_QUADRUPOLES
rotateQuadupoles
(
qiRotationMatrix
,
atom1
.
sphericalQuadrupole
,
atom2
.
sphericalQuadrupole
,
rotatedQuadrupole1
,
rotatedQuadrupole2
);
#endif
// The field derivatives at I due to permanent and induced moments on J, and vice-versa.
// Also, their derivatives w.r.t. R, which are needed for force calculations
real
Vij
[
9
],
Vji
[
9
],
VjiR
[
9
],
VijR
[
9
];
// The field derivatives at I due to only permanent moments on J, and vice-versa.
real
Vijp
[
3
],
Vijd
[
3
],
Vjip
[
3
],
Vjid
[
3
];
real
rInvVec
[
7
],
alphaRVec
[
8
],
bVec
[
5
];
// The rInvVec array is defined such that the ith element is R^-i, with the
// dieleectric constant folded in, to avoid conversions later.
rInvVec
[
1
]
=
rInv
;
for
(
int
i
=
2
;
i
<
7
;
++
i
)
rInvVec
[
i
]
=
rInvVec
[
i
-
1
]
*
rInv
;
// The alpharVec array is defined such that the ith element is (alpha R)^i,
// where kappa (alpha in OpenMM parlance) is the Ewald attenuation parameter.
real
ralpha
=
EWALD_ALPHA
*
r
;
real
exp2a
=
EXP
(
-
(
ralpha
*
ralpha
));
#ifdef USE_DOUBLE_PRECISION
const
real
erf
c
AlphaR
=
erf
c
(
ralpha
);
const
real
erfAlphaR
=
erf
(
ralpha
);
#else
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 1.5e-7.
const
real
t
=
RECIP
(
1.0
f
+
0.3275911
f
*
ralpha
);
const
real
erf
c
AlphaR
=
(
0.254829592
f
+
(
-
0.284496736
f
+
(
1.421413741
f
+
(
-
1.453152027
f
+
1.061405429
f
*
t
)
*
t
)
*
t
)
*
t
)
*
t
*
exp2a
;
const
real
erfAlphaR
=
1
-
(
0.254829592
f
+
(
-
0.284496736
f
+
(
1.421413741
f
+
(
-
1.453152027
f
+
1.061405429
f
*
t
)
*
t
)
*
t
)
*
t
)
*
t
*
exp2a
;
#endif
real
bn0
=
erfcAlphaR
*
rr1
;
energy
+=
forceFactor
*
atom1
.
q
*
atom2
.
q
*
bn0
;
real
rr2
=
rr1
*
rr1
;
alsq2n
*=
alsq2
;
real4
bn
;
bn
.
x
=
(
bn0
+
alsq2n
*
exp2a
)
*
rr2
;
alsq2n
*=
alsq2
;
bn
.
y
=
(
3
*
bn
.
x
+
alsq2n
*
exp2a
)
*
rr2
;
alsq2n
*=
alsq2
;
bn
.
z
=
(
5
*
bn
.
y
+
alsq2n
*
exp2a
)
*
rr2
;
alsq2n
*=
alsq2
;
bn
.
w
=
(
7
*
bn
.
z
+
alsq2n
*
exp2a
)
*
rr2
;
alphaRVec
[
1
]
=
ralpha
;
for
(
int
i
=
2
;
i
<
8
;
++
i
)
alphaRVec
[
i
]
=
alphaRVec
[
i
-
1
]
*
ralpha
;
real
X
=
2
*
exp2a
/
SQRT_PI
;
int
doubleFactorial
=
1
,
facCount
=
1
;
real
tmp
=
alphaRVec
[
1
];
bVec
[
1
]
=
-
erfAlphaR
;
for
(
int
i
=
2
;
i
<
5
;
++
i
)
{
bVec
[
i
]
=
bVec
[
i
-
1
]
+
tmp
*
X
/
(
real
)(
doubleFactorial
);
facCount
=
facCount
+
2
;
doubleFactorial
=
doubleFactorial
*
facCount
;
tmp
*=
2
*
alphaRVec
[
2
];
}
alsq2n
*=
alsq2
;
real
bn5
=
(
9
*
bn
.
w
+
alsq2n
*
exp2a
)
*
rr2
;
real
dmp
=
atom1
.
damp
*
atom2
.
damp
;
real
a
=
min
(
atom1
.
thole
,
atom2
.
thole
);
real
u
=
fabs
(
dmp
)
>
1.0e-5
f
?
r
/
dmp
:
1e10
f
;
real
au3
=
a
*
u
*
u
*
u
;
real
expau3
=
au3
<
50
?
EXP
(
-
au3
)
:
0
;
real
a2u6
=
au3
*
au3
;
real
a3u9
=
a2u6
*
au3
;
// Thole damping factors for energies
real
thole_c
=
1
-
expau3
;
real
thole_d0
=
1
-
expau3
*
(
1
+
1.5
f
*
au3
);
real
thole_d1
=
1
-
expau3
;
real
thole_q0
=
1
-
expau3
*
(
1
+
au3
+
a2u6
);
real
thole_q1
=
1
-
expau3
*
(
1
+
au3
);
// Thole damping factors for derivatives
real
dthole_c
=
1
-
expau3
*
(
1
+
1.5
f
*
au3
);
real
dthole_d0
=
1
-
expau3
*
(
1
+
au3
+
1.5
f
*
a2u6
);
real
dthole_d1
=
1
-
expau3
*
(
1
+
au3
);
real
dthole_q0
=
1
-
expau3
*
(
1
+
au3
+
0.25
f
*
a2u6
+
0.75
f
*
a3u9
);
real
dthole_q1
=
1
-
expau3
*
(
1
+
au3
+
0.75
f
*
a2u6
);
// Now we compute the (attenuated) Coulomb operator and its derivatives, contracted with
// permanent moments and induced dipoles. Note that the coefficient of the permanent force
// terms is half of the expected value; this is because we compute the interaction of I with
// the sum of induced and permanent moments on J, as well as the interaction of J with I's
// permanent and induced moments; doing so double counts the permanent-permanent interaction.
real
ePermCoef
,
dPermCoef
,
eUIndCoef
,
dUIndCoef
,
eUInpCoef
,
dUInpCoef
;
// C-C terms (m=0)
ePermCoef
=
rInvVec
[
1
]
*
(
mScale
+
bVec
[
2
]
-
alphaRVec
[
1
]
*
X
);
dPermCoef
=
-
0.5
f
*
(
mScale
+
bVec
[
2
])
*
rInvVec
[
2
];
Vij
[
0
]
=
ePermCoef
*
atom2
.
q
;
Vji
[
0
]
=
ePermCoef
*
atom1
.
q
;
VijR
[
0
]
=
dPermCoef
*
atom2
.
q
;
VjiR
[
0
]
=
dPermCoef
*
atom1
.
q
;
// C-D and C-Uind terms (m=0)
ePermCoef
=
rInvVec
[
2
]
*
(
mScale
+
bVec
[
2
]);
eUIndCoef
=
rInvVec
[
2
]
*
(
pScale
*
thole_c
+
bVec
[
2
]);
eUInpCoef
=
rInvVec
[
2
]
*
(
dScale
*
thole_c
+
bVec
[
2
]);
dPermCoef
=
-
rInvVec
[
3
]
*
(
mScale
+
bVec
[
2
]
+
alphaRVec
[
3
]
*
X
);
dUIndCoef
=
-
2
*
rInvVec
[
3
]
*
(
pScale
*
dthole_c
+
bVec
[
2
]
+
alphaRVec
[
3
]
*
X
);
dUInpCoef
=
-
2
*
rInvVec
[
3
]
*
(
dScale
*
dthole_c
+
bVec
[
2
]
+
alphaRVec
[
3
]
*
X
);
Vij
[
0
]
+=
-
(
ePermCoef
*
rotatedDipole2
.
x
+
eUIndCoef
*
qiUindJ
.
x
+
eUInpCoef
*
qiUinpJ
.
x
);
Vji
[
1
]
=
-
(
ePermCoef
*
atom1
.
q
);
VijR
[
0
]
+=
-
(
dPermCoef
*
rotatedDipole2
.
x
+
dUIndCoef
*
qiUindJ
.
x
+
dUInpCoef
*
qiUinpJ
.
x
);
VjiR
[
1
]
=
-
(
dPermCoef
*
atom1
.
q
);
Vjip
[
0
]
=
-
(
eUInpCoef
*
atom1
.
q
);
Vjid
[
0
]
=
-
(
eUIndCoef
*
atom1
.
q
);
// D-C and Uind-C terms (m=0)
Vij
[
1
]
=
ePermCoef
*
atom2
.
q
;
Vji
[
0
]
+=
ePermCoef
*
rotatedDipole1
.
x
+
eUIndCoef
*
qiUindI
.
x
+
eUInpCoef
*
qiUinpI
.
x
;
VijR
[
1
]
=
dPermCoef
*
atom2
.
q
;
VjiR
[
0
]
+=
dPermCoef
*
rotatedDipole1
.
x
+
dUIndCoef
*
qiUindI
.
x
+
dUInpCoef
*
qiUinpI
.
x
;
Vijp
[
0
]
=
eUInpCoef
*
atom2
.
q
;
Vijd
[
0
]
=
eUIndCoef
*
atom2
.
q
;
// D-D and D-Uind terms (m=0)
const
real
twoThirds
=
(
real
)
2
/
3
;
ePermCoef
=
-
twoThirds
*
rInvVec
[
3
]
*
(
3
*
(
mScale
+
bVec
[
3
])
+
alphaRVec
[
3
]
*
X
);
eUIndCoef
=
-
twoThirds
*
rInvVec
[
3
]
*
(
3
*
(
pScale
*
thole_d0
+
bVec
[
3
])
+
alphaRVec
[
3
]
*
X
);
eUInpCoef
=
-
twoThirds
*
rInvVec
[
3
]
*
(
3
*
(
dScale
*
thole_d0
+
bVec
[
3
])
+
alphaRVec
[
3
]
*
X
);
dPermCoef
=
rInvVec
[
4
]
*
(
3
*
(
mScale
+
bVec
[
3
])
+
2
*
alphaRVec
[
5
]
*
X
);
dUIndCoef
=
rInvVec
[
4
]
*
(
6
*
(
pScale
*
dthole_d0
+
bVec
[
3
])
+
4
*
alphaRVec
[
5
]
*
X
);
dUInpCoef
=
rInvVec
[
4
]
*
(
6
*
(
dScale
*
dthole_d0
+
bVec
[
3
])
+
4
*
alphaRVec
[
5
]
*
X
);
Vij
[
1
]
+=
ePermCoef
*
rotatedDipole2
.
x
+
eUIndCoef
*
qiUindJ
.
x
+
eUInpCoef
*
qiUinpJ
.
x
;
Vji
[
1
]
+=
ePermCoef
*
rotatedDipole1
.
x
+
eUIndCoef
*
qiUindI
.
x
+
eUInpCoef
*
qiUinpI
.
x
;
VijR
[
1
]
+=
dPermCoef
*
rotatedDipole2
.
x
+
dUIndCoef
*
qiUindJ
.
x
+
dUInpCoef
*
qiUinpJ
.
x
;
VjiR
[
1
]
+=
dPermCoef
*
rotatedDipole1
.
x
+
dUIndCoef
*
qiUindI
.
x
+
dUInpCoef
*
qiUinpI
.
x
;
Vijp
[
0
]
+=
eUInpCoef
*
rotatedDipole2
.
x
;
Vijd
[
0
]
+=
eUIndCoef
*
rotatedDipole2
.
x
;
Vjip
[
0
]
+=
eUInpCoef
*
rotatedDipole1
.
x
;
Vjid
[
0
]
+=
eUIndCoef
*
rotatedDipole1
.
x
;
// D-D and D-Uind terms (m=1)
ePermCoef
=
rInvVec
[
3
]
*
(
mScale
+
bVec
[
3
]
-
twoThirds
*
alphaRVec
[
3
]
*
X
);
eUIndCoef
=
rInvVec
[
3
]
*
(
pScale
*
thole_d1
+
bVec
[
3
]
-
twoThirds
*
alphaRVec
[
3
]
*
X
);
eUInpCoef
=
rInvVec
[
3
]
*
(
dScale
*
thole_d1
+
bVec
[
3
]
-
twoThirds
*
alphaRVec
[
3
]
*
X
);
dPermCoef
=
-
1.5
f
*
rInvVec
[
4
]
*
(
mScale
+
bVec
[
3
]);
dUIndCoef
=
-
3
*
rInvVec
[
4
]
*
(
pScale
*
dthole_d1
+
bVec
[
3
]);
dUInpCoef
=
-
3
*
rInvVec
[
4
]
*
(
dScale
*
dthole_d1
+
bVec
[
3
]);
Vij
[
2
]
=
ePermCoef
*
rotatedDipole2
.
y
+
eUIndCoef
*
qiUindJ
.
y
+
eUInpCoef
*
qiUinpJ
.
y
;
Vji
[
2
]
=
ePermCoef
*
rotatedDipole1
.
y
+
eUIndCoef
*
qiUindI
.
y
+
eUInpCoef
*
qiUinpI
.
y
;
VijR
[
2
]
=
dPermCoef
*
rotatedDipole2
.
y
+
dUIndCoef
*
qiUindJ
.
y
+
dUInpCoef
*
qiUinpJ
.
y
;
VjiR
[
2
]
=
dPermCoef
*
rotatedDipole1
.
y
+
dUIndCoef
*
qiUindI
.
y
+
dUInpCoef
*
qiUinpI
.
y
;
Vij
[
3
]
=
ePermCoef
*
rotatedDipole2
.
z
+
eUIndCoef
*
qiUindJ
.
z
+
eUInpCoef
*
qiUinpJ
.
z
;
Vji
[
3
]
=
ePermCoef
*
rotatedDipole1
.
z
+
eUIndCoef
*
qiUindI
.
z
+
eUInpCoef
*
qiUinpI
.
z
;
VijR
[
3
]
=
dPermCoef
*
rotatedDipole2
.
z
+
dUIndCoef
*
qiUindJ
.
z
+
dUInpCoef
*
qiUinpJ
.
z
;
VjiR
[
3
]
=
dPermCoef
*
rotatedDipole1
.
z
+
dUIndCoef
*
qiUindI
.
z
+
dUInpCoef
*
qiUinpI
.
z
;
Vijp
[
1
]
=
eUInpCoef
*
rotatedDipole2
.
y
;
Vijd
[
1
]
=
eUIndCoef
*
rotatedDipole2
.
y
;
Vjip
[
1
]
=
eUInpCoef
*
rotatedDipole1
.
y
;
Vjid
[
1
]
=
eUIndCoef
*
rotatedDipole1
.
y
;
Vijp
[
2
]
=
eUInpCoef
*
rotatedDipole2
.
z
;
Vijd
[
2
]
=
eUIndCoef
*
rotatedDipole2
.
z
;
Vjip
[
2
]
=
eUInpCoef
*
rotatedDipole1
.
z
;
Vjid
[
2
]
=
eUIndCoef
*
rotatedDipole1
.
z
;
// C-Q terms (m=0)
ePermCoef
=
(
mScale
+
bVec
[
3
])
*
rInvVec
[
3
];
dPermCoef
=
-
((
real
)
1
/
3
)
*
rInvVec
[
4
]
*
(
4.5
f
*
(
mScale
+
bVec
[
3
])
+
2
*
alphaRVec
[
5
]
*
X
);
Vij
[
0
]
+=
ePermCoef
*
rotatedQuadrupole2
[
0
];
Vji
[
4
]
=
ePermCoef
*
atom1
.
q
;
VijR
[
0
]
+=
dPermCoef
*
rotatedQuadrupole2
[
0
];
VjiR
[
4
]
=
dPermCoef
*
atom1
.
q
;
// Q-C terms (m=0)
Vij
[
4
]
=
ePermCoef
*
atom2
.
q
;
Vji
[
0
]
+=
ePermCoef
*
rotatedQuadrupole1
[
0
];
VijR
[
4
]
=
dPermCoef
*
atom2
.
q
;
VjiR
[
0
]
+=
dPermCoef
*
rotatedQuadrupole1
[
0
];
// D-Q and Uind-Q terms (m=0)
const
real
fourThirds
=
(
real
)
4
/
3
;
ePermCoef
=
rInvVec
[
4
]
*
(
3
*
(
mScale
+
bVec
[
3
])
+
fourThirds
*
alphaRVec
[
5
]
*
X
);
eUIndCoef
=
rInvVec
[
4
]
*
(
3
*
(
pScale
*
thole_q0
+
bVec
[
3
])
+
fourThirds
*
alphaRVec
[
5
]
*
X
);
eUInpCoef
=
rInvVec
[
4
]
*
(
3
*
(
dScale
*
thole_q0
+
bVec
[
3
])
+
fourThirds
*
alphaRVec
[
5
]
*
X
);
dPermCoef
=
-
fourThirds
*
rInvVec
[
5
]
*
(
4.5
f
*
(
mScale
+
bVec
[
3
])
+
(
1
+
alphaRVec
[
2
])
*
alphaRVec
[
5
]
*
X
);
dUIndCoef
=
-
fourThirds
*
rInvVec
[
5
]
*
(
9
*
(
pScale
*
dthole_q0
+
bVec
[
3
])
+
2
*
(
1
+
alphaRVec
[
2
])
*
alphaRVec
[
5
]
*
X
);
dUInpCoef
=
-
fourThirds
*
rInvVec
[
5
]
*
(
9
*
(
dScale
*
dthole_q0
+
bVec
[
3
])
+
2
*
(
1
+
alphaRVec
[
2
])
*
alphaRVec
[
5
]
*
X
);
Vij
[
1
]
+=
ePermCoef
*
rotatedQuadrupole2
[
0
];
Vji
[
4
]
+=
ePermCoef
*
rotatedDipole1
.
x
+
eUIndCoef
*
qiUindI
.
x
+
eUInpCoef
*
qiUinpI
.
x
;
VijR
[
1
]
+=
dPermCoef
*
rotatedQuadrupole2
[
0
];
VjiR
[
4
]
+=
dPermCoef
*
rotatedDipole1
.
x
+
dUIndCoef
*
qiUindI
.
x
+
dUInpCoef
*
qiUinpI
.
x
;
Vijp
[
0
]
+=
eUInpCoef
*
rotatedQuadrupole2
[
0
];
Vijd
[
0
]
+=
eUIndCoef
*
rotatedQuadrupole2
[
0
];
// Q-D and Q-Uind terms (m=0)
Vij
[
4
]
+=
-
(
ePermCoef
*
rotatedDipole2
.
x
+
eUIndCoef
*
qiUindJ
.
x
+
eUInpCoef
*
qiUinpJ
.
x
);
Vji
[
1
]
+=
-
(
ePermCoef
*
rotatedQuadrupole1
[
0
]);
VijR
[
4
]
+=
-
(
dPermCoef
*
rotatedDipole2
.
x
+
dUIndCoef
*
qiUindJ
.
x
+
dUInpCoef
*
qiUinpJ
.
x
);
VjiR
[
1
]
+=
-
(
dPermCoef
*
rotatedQuadrupole1
[
0
]);
Vjip
[
0
]
+=
-
(
eUInpCoef
*
rotatedQuadrupole1
[
0
]);
Vjid
[
0
]
+=
-
(
eUIndCoef
*
rotatedQuadrupole1
[
0
]);
// D-Q and Uind-Q terms (m=1)
const
real
sqrtThree
=
SQRT
((
real
)
3
);
ePermCoef
=
-
sqrtThree
*
rInvVec
[
4
]
*
(
mScale
+
bVec
[
3
]);
eUIndCoef
=
-
sqrtThree
*
rInvVec
[
4
]
*
(
pScale
*
thole_q1
+
bVec
[
3
]);
eUInpCoef
=
-
sqrtThree
*
rInvVec
[
4
]
*
(
dScale
*
thole_q1
+
bVec
[
3
]);
const
real
fourSqrtOneThird
=
4
/
sqrt
((
real
)
3
);
dPermCoef
=
fourSqrtOneThird
*
rInvVec
[
5
]
*
(
1.5
f
*
(
mScale
+
bVec
[
3
])
+
0.5
f
*
alphaRVec
[
5
]
*
X
);
dUIndCoef
=
fourSqrtOneThird
*
rInvVec
[
5
]
*
(
3
*
(
pScale
*
dthole_q1
+
bVec
[
3
])
+
alphaRVec
[
5
]
*
X
);
dUInpCoef
=
fourSqrtOneThird
*
rInvVec
[
5
]
*
(
3
*
(
dScale
*
dthole_q1
+
bVec
[
3
])
+
alphaRVec
[
5
]
*
X
);
Vij
[
2
]
+=
ePermCoef
*
rotatedQuadrupole2
[
1
];
Vji
[
5
]
=
ePermCoef
*
rotatedDipole1
.
y
+
eUIndCoef
*
qiUindI
.
y
+
eUInpCoef
*
qiUinpI
.
y
;
VijR
[
2
]
+=
dPermCoef
*
rotatedQuadrupole2
[
1
];
VjiR
[
5
]
=
dPermCoef
*
rotatedDipole1
.
y
+
dUIndCoef
*
qiUindI
.
y
+
dUInpCoef
*
qiUinpI
.
y
;
Vij
[
3
]
+=
ePermCoef
*
rotatedQuadrupole2
[
2
];
Vji
[
6
]
=
ePermCoef
*
rotatedDipole1
.
z
+
eUIndCoef
*
qiUindI
.
z
+
eUInpCoef
*
qiUinpI
.
z
;
VijR
[
3
]
+=
dPermCoef
*
rotatedQuadrupole2
[
2
];
VjiR
[
6
]
=
dPermCoef
*
rotatedDipole1
.
z
+
dUIndCoef
*
qiUindI
.
z
+
dUInpCoef
*
qiUinpI
.
z
;
Vijp
[
1
]
+=
eUInpCoef
*
rotatedQuadrupole2
[
1
];
Vijd
[
1
]
+=
eUIndCoef
*
rotatedQuadrupole2
[
1
];
Vijp
[
2
]
+=
eUInpCoef
*
rotatedQuadrupole2
[
2
];
Vijd
[
2
]
+=
eUIndCoef
*
rotatedQuadrupole2
[
2
];
// D-Q and Uind-Q terms (m=1)
Vij
[
5
]
=
-
(
ePermCoef
*
rotatedDipole2
.
y
+
eUIndCoef
*
qiUindJ
.
y
+
eUInpCoef
*
qiUinpJ
.
y
);
Vji
[
2
]
+=
-
(
ePermCoef
*
rotatedQuadrupole1
[
1
]);
VijR
[
5
]
=
-
(
dPermCoef
*
rotatedDipole2
.
y
+
dUIndCoef
*
qiUindJ
.
y
+
dUInpCoef
*
qiUinpJ
.
y
);
VjiR
[
2
]
+=
-
(
dPermCoef
*
rotatedQuadrupole1
[
1
]);
Vij
[
6
]
=
-
(
ePermCoef
*
rotatedDipole2
.
z
+
eUIndCoef
*
qiUindJ
.
z
+
eUInpCoef
*
qiUinpJ
.
z
);
Vji
[
3
]
+=
-
(
ePermCoef
*
rotatedQuadrupole1
[
2
]);
VijR
[
6
]
=
-
(
dPermCoef
*
rotatedDipole2
.
z
+
dUIndCoef
*
qiUindJ
.
z
+
dUInpCoef
*
qiUinpJ
.
z
);
VjiR
[
3
]
+=
-
(
dPermCoef
*
rotatedQuadrupole1
[
2
]);
Vjip
[
1
]
+=
-
(
eUInpCoef
*
rotatedQuadrupole1
[
1
]);
Vjid
[
1
]
+=
-
(
eUIndCoef
*
rotatedQuadrupole1
[
1
]);
Vjip
[
2
]
+=
-
(
eUInpCoef
*
rotatedQuadrupole1
[
2
]);
Vjid
[
2
]
+=
-
(
eUIndCoef
*
rotatedQuadrupole1
[
2
]);
// Q-Q terms (m=0)
ePermCoef
=
rInvVec
[
5
]
*
(
6
*
(
mScale
+
bVec
[
4
])
+
((
real
)
4
/
45
)
*
(
-
3
+
10
*
alphaRVec
[
2
])
*
alphaRVec
[
5
]
*
X
);
dPermCoef
=
-
rInvVec
[
6
]
*
(
135
*
(
mScale
+
bVec
[
4
])
+
4
*
(
1
+
2
*
alphaRVec
[
2
])
*
alphaRVec
[
7
]
*
X
)
/
9
;
Vij
[
4
]
+=
ePermCoef
*
rotatedQuadrupole2
[
0
];
Vji
[
4
]
+=
ePermCoef
*
rotatedQuadrupole1
[
0
];
VijR
[
4
]
+=
dPermCoef
*
rotatedQuadrupole2
[
0
];
VjiR
[
4
]
+=
dPermCoef
*
rotatedQuadrupole1
[
0
];
// Q-Q terms (m=1)
const
real
fourOverFifteen
=
(
real
)
4
/
15
;
ePermCoef
=
-
fourOverFifteen
*
rInvVec
[
5
]
*
(
15
*
(
mScale
+
bVec
[
4
])
+
alphaRVec
[
5
]
*
X
);
dPermCoef
=
rInvVec
[
6
]
*
(
10
*
(
mScale
+
bVec
[
4
])
+
fourThirds
*
alphaRVec
[
7
]
*
X
);
Vij
[
5
]
+=
ePermCoef
*
rotatedQuadrupole2
[
1
];
Vji
[
5
]
+=
ePermCoef
*
rotatedQuadrupole1
[
1
];
VijR
[
5
]
+=
dPermCoef
*
rotatedQuadrupole2
[
1
];
VjiR
[
5
]
+=
dPermCoef
*
rotatedQuadrupole1
[
1
];
Vij
[
6
]
+=
ePermCoef
*
rotatedQuadrupole2
[
2
];
Vji
[
6
]
+=
ePermCoef
*
rotatedQuadrupole1
[
2
];
VijR
[
6
]
+=
dPermCoef
*
rotatedQuadrupole2
[
2
];
VjiR
[
6
]
+=
dPermCoef
*
rotatedQuadrupole1
[
2
];
// Q-Q terms (m=2)
ePermCoef
=
rInvVec
[
5
]
*
(
mScale
+
bVec
[
4
]
-
fourOverFifteen
*
alphaRVec
[
5
]
*
X
);
dPermCoef
=
-
2.5
f
*
(
mScale
+
bVec
[
4
])
*
rInvVec
[
6
];
Vij
[
7
]
=
ePermCoef
*
rotatedQuadrupole2
[
3
];
Vji
[
7
]
=
ePermCoef
*
rotatedQuadrupole1
[
3
];
VijR
[
7
]
=
dPermCoef
*
rotatedQuadrupole2
[
3
];
VjiR
[
7
]
=
dPermCoef
*
rotatedQuadrupole1
[
3
];
Vij
[
8
]
=
ePermCoef
*
rotatedQuadrupole2
[
4
];
Vji
[
8
]
=
ePermCoef
*
rotatedQuadrupole1
[
4
];
VijR
[
8
]
=
dPermCoef
*
rotatedQuadrupole2
[
4
];
VjiR
[
8
]
=
dPermCoef
*
rotatedQuadrupole1
[
4
];
// Evaluate the energies, forces and torques due to permanent+induced moments
// interacting with just the permanent moments.
energy
+=
forceFactor
*
0.5
f
*
(
atom1
.
q
*
Vij
[
0
]
+
rotatedDipole1
.
x
*
Vij
[
1
]
+
rotatedDipole1
.
y
*
Vij
[
2
]
+
rotatedDipole1
.
z
*
Vij
[
3
]
+
rotatedQuadrupole1
[
0
]
*
Vij
[
4
]
+
rotatedQuadrupole1
[
1
]
*
Vij
[
5
]
+
rotatedQuadrupole1
[
2
]
*
Vij
[
6
]
+
rotatedQuadrupole1
[
3
]
*
Vij
[
7
]
+
rotatedQuadrupole1
[
4
]
*
Vij
[
8
]
+
atom2
.
q
*
Vji
[
0
]
+
rotatedDipole2
.
x
*
Vji
[
1
]
+
rotatedDipole2
.
y
*
Vji
[
2
]
+
rotatedDipole2
.
z
*
Vji
[
3
]
+
rotatedQuadrupole2
[
0
]
*
Vji
[
4
]
+
rotatedQuadrupole2
[
1
]
*
Vji
[
5
]
+
rotatedQuadrupole2
[
2
]
*
Vji
[
6
]
+
rotatedQuadrupole2
[
3
]
*
Vji
[
7
]
+
rotatedQuadrupole2
[
4
]
*
Vji
[
8
]);
real
fIZ
=
atom1
.
q
*
VijR
[
0
]
+
rotatedDipole1
.
x
*
VijR
[
1
]
+
rotatedDipole1
.
y
*
VijR
[
2
]
+
rotatedDipole1
.
z
*
VijR
[
3
]
+
rotatedQuadrupole1
[
0
]
*
VijR
[
4
]
+
rotatedQuadrupole1
[
1
]
*
VijR
[
5
]
+
rotatedQuadrupole1
[
2
]
*
VijR
[
6
]
+
rotatedQuadrupole1
[
3
]
*
VijR
[
7
]
+
rotatedQuadrupole1
[
4
]
*
VijR
[
8
];
real
fJZ
=
atom2
.
q
*
VjiR
[
0
]
+
rotatedDipole2
.
x
*
VjiR
[
1
]
+
rotatedDipole2
.
y
*
VjiR
[
2
]
+
rotatedDipole2
.
z
*
VjiR
[
3
]
+
rotatedQuadrupole2
[
0
]
*
VjiR
[
4
]
+
rotatedQuadrupole2
[
1
]
*
VjiR
[
5
]
+
rotatedQuadrupole2
[
2
]
*
VjiR
[
6
]
+
rotatedQuadrupole2
[
3
]
*
VjiR
[
7
]
+
rotatedQuadrupole2
[
4
]
*
VjiR
[
8
];
real
EIX
=
rotatedDipole1
.
z
*
Vij
[
1
]
-
rotatedDipole1
.
x
*
Vij
[
3
]
+
sqrtThree
*
rotatedQuadrupole1
[
2
]
*
Vij
[
4
]
+
rotatedQuadrupole1
[
4
]
*
Vij
[
5
]
-
(
sqrtThree
*
rotatedQuadrupole1
[
0
]
+
rotatedQuadrupole1
[
3
])
*
Vij
[
6
]
+
rotatedQuadrupole1
[
2
]
*
Vij
[
7
]
-
rotatedQuadrupole1
[
1
]
*
Vij
[
8
];
real
EIY
=
-
rotatedDipole1
.
y
*
Vij
[
1
]
+
rotatedDipole1
.
x
*
Vij
[
2
]
-
sqrtThree
*
rotatedQuadrupole1
[
1
]
*
Vij
[
4
]
+
(
sqrtThree
*
rotatedQuadrupole1
[
0
]
-
rotatedQuadrupole1
[
3
])
*
Vij
[
5
]
-
rotatedQuadrupole1
[
4
]
*
Vij
[
6
]
+
rotatedQuadrupole1
[
1
]
*
Vij
[
7
]
+
rotatedQuadrupole1
[
2
]
*
Vij
[
8
];
real
EIZ
=
-
rotatedDipole1
.
z
*
Vij
[
2
]
+
rotatedDipole1
.
y
*
Vij
[
3
]
-
rotatedQuadrupole1
[
2
]
*
Vij
[
5
]
+
rotatedQuadrupole1
[
1
]
*
Vij
[
6
]
-
2
*
rotatedQuadrupole1
[
4
]
*
Vij
[
7
]
+
2
*
rotatedQuadrupole1
[
3
]
*
Vij
[
8
];
real
EJX
=
rotatedDipole2
.
z
*
Vji
[
1
]
-
rotatedDipole2
.
x
*
Vji
[
3
]
+
sqrtThree
*
rotatedQuadrupole2
[
2
]
*
Vji
[
4
]
+
rotatedQuadrupole2
[
4
]
*
Vji
[
5
]
-
(
sqrtThree
*
rotatedQuadrupole2
[
0
]
+
rotatedQuadrupole2
[
3
])
*
Vji
[
6
]
+
rotatedQuadrupole2
[
2
]
*
Vji
[
7
]
-
rotatedQuadrupole2
[
1
]
*
Vji
[
8
];
real
EJY
=
-
rotatedDipole2
.
y
*
Vji
[
1
]
+
rotatedDipole2
.
x
*
Vji
[
2
]
-
sqrtThree
*
rotatedQuadrupole2
[
1
]
*
Vji
[
4
]
+
(
sqrtThree
*
rotatedQuadrupole2
[
0
]
-
rotatedQuadrupole2
[
3
])
*
Vji
[
5
]
-
rotatedQuadrupole2
[
4
]
*
Vji
[
6
]
+
rotatedQuadrupole2
[
1
]
*
Vji
[
7
]
+
rotatedQuadrupole2
[
2
]
*
Vji
[
8
];
real
EJZ
=
-
rotatedDipole2
.
z
*
Vji
[
2
]
+
rotatedDipole2
.
y
*
Vji
[
3
]
-
rotatedQuadrupole2
[
2
]
*
Vji
[
5
]
+
rotatedQuadrupole2
[
1
]
*
Vji
[
6
]
-
2
*
rotatedQuadrupole2
[
4
]
*
Vji
[
7
]
+
2
*
rotatedQuadrupole2
[
3
]
*
Vji
[
8
];
// Define the torque intermediates for the induced dipoles. These are simply the induced dipole torque
// intermediates dotted with the field due to permanent moments only, at each center. We inline the
// induced dipole torque intermediates here, for simplicity. N.B. There are no torques on the dipoles
// themselves, so we accumulate the torque intermediates into separate variables to allow them to be
// used only in the force calculation.
//
// The torque about the x axis (needed to obtain the y force on the induced dipoles, below)
// qiUindIx[0] = qiQUindI[2]; qiUindIx[1] = 0; qiUindIx[2] = -qiQUindI[0]
real
iEIX
=
qiUinpI
.
z
*
Vijp
[
0
]
+
qiUindI
.
z
*
Vijd
[
0
]
-
qiUinpI
.
x
*
Vijp
[
2
]
-
qiUindI
.
x
*
Vijd
[
2
];
real
iEJX
=
qiUinpJ
.
z
*
Vjip
[
0
]
+
qiUindJ
.
z
*
Vjid
[
0
]
-
qiUinpJ
.
x
*
Vjip
[
2
]
-
qiUindJ
.
x
*
Vjid
[
2
];
// The torque about the y axis (needed to obtain the x force on the induced dipoles, below)
// qiUindIy[0] = -qiQUindI[1]; qiUindIy[1] = qiQUindI[0]; qiUindIy[2] = 0
real
iEIY
=
qiUinpI
.
x
*
Vijp
[
1
]
+
qiUindI
.
x
*
Vijd
[
1
]
-
qiUinpI
.
y
*
Vijp
[
0
]
-
qiUindI
.
y
*
Vijd
[
0
];
real
iEJY
=
qiUinpJ
.
x
*
Vjip
[
1
]
+
qiUindJ
.
x
*
Vjid
[
1
]
-
qiUinpJ
.
y
*
Vjip
[
0
]
-
qiUindJ
.
y
*
Vjid
[
0
];
#ifdef USE_MUTUAL_POLARIZATION
// Uind-Uind terms (m=0)
real
eCoef
=
-
fourThirds
*
rInvVec
[
3
]
*
(
3
*
(
thole_d0
+
bVec
[
3
])
+
alphaRVec
[
3
]
*
X
);
real
dCoef
=
rInvVec
[
4
]
*
(
6
*
(
dthole_d0
+
bVec
[
3
])
+
4
*
alphaRVec
[
5
]
*
X
);
iEIX
+=
eCoef
*
(
qiUinpI
.
z
*
qiUindJ
.
x
+
qiUindI
.
z
*
qiUinpJ
.
x
);
iEJX
+=
eCoef
*
(
qiUinpJ
.
z
*
qiUindI
.
x
+
qiUindJ
.
z
*
qiUinpI
.
x
);
iEIY
-=
eCoef
*
(
qiUinpI
.
y
*
qiUindJ
.
x
+
qiUindI
.
y
*
qiUinpJ
.
x
);
iEJY
-=
eCoef
*
(
qiUinpJ
.
y
*
qiUindI
.
x
+
qiUindJ
.
y
*
qiUinpI
.
x
);
fIZ
+=
dCoef
*
(
qiUinpI
.
x
*
qiUindJ
.
x
+
qiUindI
.
x
*
qiUinpJ
.
x
);
fIZ
+=
dCoef
*
(
qiUinpJ
.
x
*
qiUindI
.
x
+
qiUindJ
.
x
*
qiUinpI
.
x
);
// Uind-Uind terms (m=1)
eCoef
=
2
*
rInvVec
[
3
]
*
(
thole_d1
+
bVec
[
3
]
-
twoThirds
*
alphaRVec
[
3
]
*
X
);
dCoef
=
-
3
*
rInvVec
[
4
]
*
(
dthole_d1
+
bVec
[
3
]);
iEIX
-=
eCoef
*
(
qiUinpI
.
x
*
qiUindJ
.
z
+
qiUindI
.
x
*
qiUinpJ
.
z
);
iEJX
-=
eCoef
*
(
qiUinpJ
.
x
*
qiUindI
.
z
+
qiUindJ
.
x
*
qiUinpI
.
z
);
iEIY
+=
eCoef
*
(
qiUinpI
.
x
*
qiUindJ
.
y
+
qiUindI
.
x
*
qiUinpJ
.
y
);
iEJY
+=
eCoef
*
(
qiUinpJ
.
x
*
qiUindI
.
y
+
qiUindJ
.
x
*
qiUinpI
.
y
);
fIZ
+=
dCoef
*
(
qiUinpI
.
y
*
qiUindJ
.
y
+
qiUindI
.
y
*
qiUinpJ
.
y
+
qiUinpI
.
z
*
qiUindJ
.
z
+
qiUindI
.
z
*
qiUinpJ
.
z
);
fIZ
+=
dCoef
*
(
qiUinpJ
.
y
*
qiUindI
.
y
+
qiUindJ
.
y
*
qiUinpI
.
y
+
qiUinpJ
.
z
*
qiUindI
.
z
+
qiUindJ
.
z
*
qiUinpI
.
z
);
#endif
real3
force
;
// The quasi-internal frame forces and torques. Note that the induced torque intermediates are
// used in the force expression, but not in the torques; the induced dipoles are isotropic.
real
qiForce
[
3
]
=
{
rInv
*
(
EIY
+
EJY
+
iEIY
+
iEJY
),
-
rInv
*
(
EIX
+
EJX
+
iEIX
+
iEJX
),
-
(
fJZ
+
fIZ
)};
real
qiTorqueI
[
3
]
=
{
-
EIX
,
-
EIY
,
-
EIZ
};
real
qiTorqueJ
[
3
]
=
{
-
EJX
,
-
EJY
,
-
EJZ
};
if
(
hasExclusions
)
{
computeOneInteractionF1
(
atom1
,
atom2
,
delta
,
bn
,
bn5
,
forceFactor
,
dScale
,
pScale
,
mScale
,
force
,
energy
);
computeOneInteractionF2
(
atom1
,
atom2
,
delta
,
bn
,
forceFactor
,
dScale
,
pScale
,
mScale
,
force
,
energy
);
}
else
{
computeOneInteractionF1NoScale
(
atom1
,
atom2
,
delta
,
bn
,
bn5
,
forceFactor
,
force
,
energy
);
computeOneInteractionF2NoScale
(
atom1
,
atom2
,
delta
,
bn
,
forceFactor
,
force
,
energy
);
}
real3
force
=
make_real3
(
qiRotationMatrix
[
1
][
1
]
*
qiForce
[
0
]
+
qiRotationMatrix
[
2
][
1
]
*
qiForce
[
1
]
+
qiRotationMatrix
[
0
][
1
]
*
qiForce
[
2
],
qiRotationMatrix
[
1
][
2
]
*
qiForce
[
0
]
+
qiRotationMatrix
[
2
][
2
]
*
qiForce
[
1
]
+
qiRotationMatrix
[
0
][
2
]
*
qiForce
[
2
],
qiRotationMatrix
[
1
][
0
]
*
qiForce
[
0
]
+
qiRotationMatrix
[
2
][
0
]
*
qiForce
[
1
]
+
qiRotationMatrix
[
0
][
0
]
*
qiForce
[
2
]);
atom1
.
force
+=
force
;
if
(
forceFactor
==
1
)
atom2
.
force
-=
force
;
if
(
hasExclusions
)
{
computeOneInteractionT1
(
atom1
,
atom2
,
delta
,
bn
,
dScale
,
pScale
,
mScale
);
computeOneInteractionT2
(
atom1
,
atom2
,
delta
,
bn
,
dScale
,
pScale
,
mScale
);
}
else
{
computeOneInteractionT1NoScale
(
atom1
,
atom2
,
delta
,
bn
);
computeOneInteractionT2NoScale
(
atom1
,
atom2
,
delta
,
bn
);
}
atom1
.
torque
+=
make_real3
(
qiRotationMatrix
[
1
][
1
]
*
qiTorqueI
[
0
]
+
qiRotationMatrix
[
2
][
1
]
*
qiTorqueI
[
1
]
+
qiRotationMatrix
[
0
][
1
]
*
qiTorqueI
[
2
],
qiRotationMatrix
[
1
][
2
]
*
qiTorqueI
[
0
]
+
qiRotationMatrix
[
2
][
2
]
*
qiTorqueI
[
1
]
+
qiRotationMatrix
[
0
][
2
]
*
qiTorqueI
[
2
],
qiRotationMatrix
[
1
][
0
]
*
qiTorqueI
[
0
]
+
qiRotationMatrix
[
2
][
0
]
*
qiTorqueI
[
1
]
+
qiRotationMatrix
[
0
][
0
]
*
qiTorqueI
[
2
]);
if
(
forceFactor
==
1
)
{
// T3 == T1 w/ particles I and J reversed
// T4 == T2 w/ particles I and J reversed
delta
.
x
=
-
delta
.
x
;
delta
.
y
=
-
delta
.
y
;
delta
.
z
=
-
delta
.
z
;
if
(
hasExclusions
)
{
computeOneInteractionT1
(
atom2
,
atom1
,
delta
,
bn
,
dScale
,
pScale
,
mScale
);
computeOneInteractionT2
(
atom2
,
atom1
,
delta
,
bn
,
dScale
,
pScale
,
mScale
);
}
else
{
computeOneInteractionT1NoScale
(
atom2
,
atom1
,
delta
,
bn
);
computeOneInteractionT2NoScale
(
atom2
,
atom1
,
delta
,
bn
);
}
atom2
.
force
-=
force
;
atom2
.
torque
+=
make_real3
(
qiRotationMatrix
[
1
][
1
]
*
qiTorqueJ
[
0
]
+
qiRotationMatrix
[
2
][
1
]
*
qiTorqueJ
[
1
]
+
qiRotationMatrix
[
0
][
1
]
*
qiTorqueJ
[
2
],
qiRotationMatrix
[
1
][
2
]
*
qiTorqueJ
[
0
]
+
qiRotationMatrix
[
2
][
2
]
*
qiTorqueJ
[
1
]
+
qiRotationMatrix
[
0
][
2
]
*
qiTorqueJ
[
2
],
qiRotationMatrix
[
1
][
0
]
*
qiTorqueJ
[
0
]
+
qiRotationMatrix
[
2
][
0
]
*
qiTorqueJ
[
1
]
+
qiRotationMatrix
[
0
][
0
]
*
qiTorqueJ
[
2
]);
}
}
...
...
@@ -166,30 +412,27 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
* Compute the self energy and self torque.
*/
__device__
void
computeSelfEnergyAndTorque
(
AtomData
&
atom1
,
real
&
energy
)
{
real
term
=
2
*
EWALD_ALPHA
*
EWALD_ALPHA
;
real
fterm
=
-
EWALD_ALPHA
/
SQRT_PI
;
real
cii
=
atom1
.
q
*
atom1
.
q
;
real
dii
=
dot
(
atom1
.
dipole
,
atom1
.
dipole
);
real3
dipole
=
make_real3
(
atom1
.
sphericalDipole
.
y
,
atom1
.
sphericalDipole
.
z
,
atom1
.
sphericalDipole
.
x
);
real
dii
=
dot
(
dipole
,
dipole
+
atom1
.
inducedDipole
);
#ifdef INCLUDE_QUADRUPOLES
real
qii
=
2
*
(
atom1
.
quadrupoleXX
*
atom1
.
quadrupoleXX
+
atom1
.
quadrupoleYY
*
atom1
.
quadrupoleYY
+
atom1
.
quadrupoleXX
*
atom1
.
quadrupoleYY
+
atom1
.
quadrupoleXY
*
atom1
.
quadrupoleXY
+
atom1
.
quadrupoleXZ
*
atom1
.
quadrupoleXZ
+
atom1
.
quadrupoleYZ
*
atom1
.
quadrupoleYZ
);
real
qii
=
(
atom1
.
sphericalQuadrupole
[
0
]
*
atom1
.
sphericalQuadrupole
[
0
]
+
atom1
.
sphericalQuadrupole
[
1
]
*
atom1
.
sphericalQuadrupole
[
1
]
+
atom1
.
sphericalQuadrupole
[
2
]
*
atom1
.
sphericalQuadrupole
[
2
]
+
atom1
.
sphericalQuadrupole
[
3
]
*
atom1
.
sphericalQuadrupole
[
3
]
+
atom1
.
sphericalQuadrupole
[
4
]
*
atom1
.
sphericalQuadrupole
[
4
]);
#else
real
qii
=
0
;
#endif
real
uii
=
dot
(
atom1
.
dipole
,
atom1
.
inducedDipole
);
real
selfEnergy
=
(
cii
+
term
*
(
dii
/
3
+
2
*
term
*
qii
/
5
));
selfEnergy
+=
term
*
uii
/
3
;
selfEnergy
*=
fterm
;
energy
+=
selfEnergy
;
real
prefac
=
-
EWALD_ALPHA
/
SQRT_PI
;
real
a2
=
EWALD_ALPHA
*
EWALD_ALPHA
;
real
a4
=
a2
*
a2
;
energy
+=
prefac
*
(
cii
+
((
real
)
2
/
3
)
*
a2
*
dii
+
((
real
)
4
/
15
)
*
a4
*
qii
);
// self-torque for PME
real3
ui
=
atom1
.
inducedDipole
+
atom1
.
inducedDipolePolar
;
atom1
.
torque
+=
((
2
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT_PI
)
*
cross
(
atom1
.
dipole
,
ui
);
atom1
.
torque
+=
((
2
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT_PI
)
*
cross
(
dipole
,
ui
);
}
/**
...
...
@@ -204,7 +447,7 @@ extern "C" __global__ void computeElectrostatics(
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
#endif
const
real
*
__restrict__
labFrame
Dipole
,
const
real
*
__restrict__
labFrame
Quadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
spherical
Dipole
,
const
real
*
__restrict__
spherical
Quadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float2
*
__restrict__
dampingAndThole
)
{
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
...
...
@@ -223,7 +466,7 @@ extern "C" __global__ void computeElectrostatics(
const
unsigned
int
y
=
tileIndices
.
y
;
AtomData
data
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
loadAtomData
(
data
,
atom1
,
posq
,
labFrameDipole
,
labFrame
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
loadAtomData
(
data
,
atom1
,
posq
,
sphericalDipole
,
spherical
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
data
.
force
=
make_real3
(
0
);
data
.
torque
=
make_real3
(
0
);
uint2
covalent
=
covalentFlags
[
pos
*
TILE_SIZE
+
tgx
];
...
...
@@ -233,13 +476,13 @@ extern "C" __global__ void computeElectrostatics(
localData
[
threadIdx
.
x
].
pos
=
data
.
pos
;
localData
[
threadIdx
.
x
].
q
=
data
.
q
;
localData
[
threadIdx
.
x
].
d
ipole
=
data
.
d
ipole
;
localData
[
threadIdx
.
x
].
sphericalD
ipole
=
data
.
sphericalD
ipole
;
#ifdef INCLUDE_QUADRUPOLES
localData
[
threadIdx
.
x
].
q
uadrupole
XX
=
data
.
q
uadrupole
XX
;
localData
[
threadIdx
.
x
].
q
uadrupole
XY
=
data
.
q
uadrupole
XY
;
localData
[
threadIdx
.
x
].
q
uadrupole
XZ
=
data
.
q
uadrupole
XZ
;
localData
[
threadIdx
.
x
].
q
uadrupole
YY
=
data
.
q
uadrupole
YY
;
localData
[
threadIdx
.
x
].
q
uadrupole
YZ
=
data
.
q
uadrupole
YZ
;
localData
[
threadIdx
.
x
].
sphericalQ
uadrupole
[
0
]
=
data
.
sphericalQ
uadrupole
[
0
]
;
localData
[
threadIdx
.
x
].
sphericalQ
uadrupole
[
1
]
=
data
.
sphericalQ
uadrupole
[
1
]
;
localData
[
threadIdx
.
x
].
sphericalQ
uadrupole
[
2
]
=
data
.
sphericalQ
uadrupole
[
2
]
;
localData
[
threadIdx
.
x
].
sphericalQ
uadrupole
[
3
]
=
data
.
sphericalQ
uadrupole
[
3
]
;
localData
[
threadIdx
.
x
].
sphericalQ
uadrupole
[
4
]
=
data
.
sphericalQ
uadrupole
[
4
]
;
#endif
localData
[
threadIdx
.
x
].
inducedDipole
=
data
.
inducedDipole
;
localData
[
threadIdx
.
x
].
inducedDipolePolar
=
data
.
inducedDipolePolar
;
...
...
@@ -272,7 +515,7 @@ extern "C" __global__ void computeElectrostatics(
// This is an off-diagonal tile.
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
loadAtomData
(
localData
[
threadIdx
.
x
],
j
,
posq
,
labFrameDipole
,
labFrame
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
loadAtomData
(
localData
[
threadIdx
.
x
],
j
,
posq
,
sphericalDipole
,
spherical
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
localData
[
threadIdx
.
x
].
force
=
make_real3
(
0
);
localData
[
threadIdx
.
x
].
torque
=
make_real3
(
0
);
unsigned
int
tj
=
tgx
;
...
...
@@ -366,7 +609,7 @@ extern "C" __global__ void computeElectrostatics(
// Load atom data for this tile.
AtomData
data
;
loadAtomData
(
data
,
atom1
,
posq
,
labFrameDipole
,
labFrame
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
loadAtomData
(
data
,
atom1
,
posq
,
sphericalDipole
,
spherical
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
data
.
force
=
make_real3
(
0
);
data
.
torque
=
make_real3
(
0
);
#ifdef USE_CUTOFF
...
...
@@ -375,7 +618,7 @@ extern "C" __global__ void computeElectrostatics(
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
#endif
atomIndices
[
threadIdx
.
x
]
=
j
;
loadAtomData
(
localData
[
threadIdx
.
x
],
j
,
posq
,
labFrameDipole
,
labFrame
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
loadAtomData
(
localData
[
threadIdx
.
x
],
j
,
posq
,
sphericalDipole
,
spherical
Quadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
localData
[
threadIdx
.
x
].
force
=
make_real3
(
0
);
localData
[
threadIdx
.
x
].
torque
=
make_real3
(
0
);
...
...
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment