Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
776f6f22
Commit
776f6f22
authored
Jul 28, 2012
by
Peter Eastman
Browse files
Continuing to convert AmoebaMultipoleForce
parent
b186314c
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
1399 additions
and
126 deletions
+1399
-126
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
+60
-36
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.h
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.h
+3
-3
plugins/amoeba/platforms/cuda2/src/kernels/amoebaVdwForce2.cu
...ins/amoeba/platforms/cuda2/src/kernels/amoebaVdwForce2.cu
+8
-9
plugins/amoeba/platforms/cuda2/src/kernels/electrostaticPairForce.cu
...eba/platforms/cuda2/src/kernels/electrostaticPairForce.cu
+538
-0
plugins/amoeba/platforms/cuda2/src/kernels/multipoleElectrostatics.cu
...ba/platforms/cuda2/src/kernels/multipoleElectrostatics.cu
+355
-0
plugins/amoeba/platforms/cuda2/src/kernels/multipoleFixedField.cu
...amoeba/platforms/cuda2/src/kernels/multipoleFixedField.cu
+14
-21
plugins/amoeba/platforms/cuda2/src/kernels/multipoles.cu
plugins/amoeba/platforms/cuda2/src/kernels/multipoles.cu
+40
-48
plugins/amoeba/platforms/cuda2/tests/TestCudaAmoebaVdwForce.cpp
...s/amoeba/platforms/cuda2/tests/TestCudaAmoebaVdwForce.cpp
+381
-9
No files found.
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
View file @
776f6f22
...
...
@@ -865,7 +865,7 @@ private:
CudaCalcAmoebaMultipoleForceKernel
::
CudaCalcAmoebaMultipoleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
System
&
system
)
:
CalcAmoebaMultipoleForceKernel
(
name
,
platform
),
cu
(
cu
),
system
(
system
),
hasInitializedScaleFactors
(
false
),
multipoleParticles
(
NULL
),
torqueBufferIndices
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
dampingAndThole
(
NULL
),
inducedDipole
(
NULL
),
inducedDipolePolar
(
NULL
),
currentEpsilon
(
NULL
),
polarizability
(
NULL
),
covalentFlags
(
NULL
),
polarizationGroupFlags
(
NULL
),
pmeGrid
(
NULL
)
{
...
...
@@ -875,8 +875,6 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
cu
.
setAsCurrent
();
if
(
multipoleParticles
!=
NULL
)
delete
multipoleParticles
;
if
(
torqueBufferIndices
!=
NULL
)
delete
torqueBufferIndices
;
if
(
molecularDipoles
!=
NULL
)
delete
molecularDipoles
;
if
(
molecularQuadrupoles
!=
NULL
)
...
...
@@ -933,8 +931,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
multipoleParticlesVec
.
push_back
(
make_int4
(
atomX
,
atomY
,
atomZ
,
axisType
));
for
(
int
j
=
0
;
j
<
3
;
j
++
)
molecularDipolesVec
.
push_back
((
float
)
dipole
[
j
]);
for
(
int
j
=
0
;
j
<
9
;
j
++
)
molecularQuadrupolesVec
.
push_back
((
float
)
quadrupole
[
j
]);
molecularQuadrupolesVec
.
push_back
((
float
)
quadrupole
[
0
]);
molecularQuadrupolesVec
.
push_back
((
float
)
quadrupole
[
1
]);
molecularQuadrupolesVec
.
push_back
((
float
)
quadrupole
[
2
]);
molecularQuadrupolesVec
.
push_back
((
float
)
quadrupole
[
4
]);
molecularQuadrupolesVec
.
push_back
((
float
)
quadrupole
[
5
]);
}
int
paddedNumAtoms
=
cu
.
getPaddedNumAtoms
();
for
(
int
i
=
numMultipoles
;
i
<
paddedNumAtoms
;
i
++
)
{
...
...
@@ -943,14 +944,14 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
multipoleParticlesVec
.
push_back
(
make_int4
(
0
,
0
,
0
,
0
));
for
(
int
j
=
0
;
j
<
3
;
j
++
)
molecularDipolesVec
.
push_back
(
0
);
for
(
int
j
=
0
;
j
<
9
;
j
++
)
for
(
int
j
=
0
;
j
<
5
;
j
++
)
molecularQuadrupolesVec
.
push_back
(
0
);
}
dampingAndThole
=
CudaArray
::
create
<
float2
>
(
cu
,
paddedNumAtoms
,
"dampingAndThole"
);
polarizability
=
CudaArray
::
create
<
float
>
(
cu
,
paddedNumAtoms
,
"polarizability"
);
multipoleParticles
=
CudaArray
::
create
<
int4
>
(
cu
,
paddedNumAtoms
,
"multipoleParticles"
);
molecularDipoles
=
CudaArray
::
create
<
float
>
(
cu
,
3
*
paddedNumAtoms
,
"molecularDipoles"
);
molecularQuadrupoles
=
CudaArray
::
create
<
float
>
(
cu
,
9
*
paddedNumAtoms
,
"molecularQuadrupoles"
);
molecularQuadrupoles
=
CudaArray
::
create
<
float
>
(
cu
,
5
*
paddedNumAtoms
,
"molecularQuadrupoles"
);
dampingAndThole
->
upload
(
dampingAndTholeVec
);
polarizability
->
upload
(
polarizabilityVec
);
multipoleParticles
->
upload
(
multipoleParticlesVec
);
...
...
@@ -962,7 +963,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
labFrameDipoles
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"labFrameDipoles"
);
labFrameQuadrupoles
=
new
CudaArray
(
cu
,
9
*
paddedNumAtoms
,
elementSize
,
"labFrameQuadrupoles"
);
labFrameQuadrupoles
=
new
CudaArray
(
cu
,
5
*
paddedNumAtoms
,
elementSize
,
"labFrameQuadrupoles"
);
field
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"field"
);
fieldPolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"fieldPolar"
);
inducedDipole
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipole"
);
...
...
@@ -999,6 +1000,15 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
polarizationFlagValues
.
push_back
(
make_int2
(
i
,
atoms
[
j
]));
}
// Record other options.
if
(
force
.
getPolarizationType
()
==
AmoebaMultipoleForce
::
Mutual
)
{
maxInducedIterations
=
force
.
getMutualInducedMaxIterations
();
inducedEpsilon
=
force
.
getMutualInducedTargetEpsilon
();
}
else
maxInducedIterations
==
0
;
// Create the kernels.
...
...
@@ -1007,13 +1017,29 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
numMultipoles
);
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
defines
[
"SCALING_DISTANCE_CUTOFF"
]
=
cu
.
doubleToString
(
50.0
);
defines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
());
defines
[
"NUM_BLOCKS"
]
=
cu
.
intToString
(
cu
.
getNumAtomBlocks
());
defines
[
"ENERGY_SCALE_FACTOR"
]
=
cu
.
doubleToString
(
138.9354558456
);
// DIVIDE BY INNER DIELECTRIC!!!
if
(
force
.
getPolarizationType
()
==
AmoebaMultipoleForce
::
Direct
)
defines
[
"DIRECT_POLARIZATION"
]
=
""
;
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoles
,
defines
);
computeMomentsKernel
=
cu
.
getKernel
(
module
,
"computeLabFrameMoments"
);
recordInducedDipolesKernel
=
cu
.
getKernel
(
module
,
"recordInducedDipoles"
);
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoleFixedField
,
defines
);
computeFixedFieldKernel
=
cu
.
getKernel
(
module
,
"computeFixedField"
);
stringstream
electrostaticsSource
;
electrostaticsSource
<<
CudaKernelSources
::
vectorOps
;
electrostaticsSource
<<
CudaAmoebaKernelSources
::
multipoleElectrostatics
;
electrostaticsSource
<<
"#define F1
\n
"
;
electrostaticsSource
<<
CudaAmoebaKernelSources
::
electrostaticPairForce
;
electrostaticsSource
<<
"#undef F1
\n
"
;
electrostaticsSource
<<
"#define T1
\n
"
;
electrostaticsSource
<<
CudaAmoebaKernelSources
::
electrostaticPairForce
;
electrostaticsSource
<<
"#undef T1
\n
"
;
electrostaticsSource
<<
"#define T2
\n
"
;
electrostaticsSource
<<
CudaAmoebaKernelSources
::
electrostaticPairForce
;
module
=
cu
.
createModule
(
electrostaticsSource
.
str
(),
defines
);
electrostaticsKernel
=
cu
.
getKernel
(
module
,
"computeElectrostatics"
);
// Set up PME.
...
...
@@ -1389,30 +1415,34 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void
*
computeMomentsArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
multipoleParticles
->
getDevicePointer
(),
&
molecularDipoles
->
getDevicePointer
(),
&
molecularQuadrupoles
->
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
()};
cu
.
executeKernel
(
computeMomentsKernel
,
computeMomentsArgs
,
cu
.
getPaddedNumAtoms
());
vector
<
float
>
d
,
q
;
labFrameDipoles
->
download
(
d
);
labFrameQuadrupoles
->
download
(
q
);
for
(
int
i
=
0
;
i
<
cu
.
getNumAtoms
();
i
++
)
printf
(
"%d %g %g %g
\n
"
,
i
,
d
[
3
*
i
],
d
[
3
*
i
+
1
],
d
[
3
*
i
+
2
]);
for
(
int
i
=
0
;
i
<
cu
.
getNumAtoms
();
i
++
)
printf
(
"%d %g %g %g %g %g %g %g %g %g
\n
"
,
i
,
q
[
9
*
i
],
q
[
9
*
i
+
1
],
q
[
9
*
i
+
2
],
q
[
9
*
i
+
3
],
q
[
9
*
i
+
4
],
q
[
9
*
i
+
5
],
q
[
9
*
i
+
6
],
q
[
9
*
i
+
7
],
q
[
9
*
i
+
8
]);
cu
.
executeKernel
(
computeMomentsKernel
,
computeMomentsArgs
,
cu
.
getNumAtoms
());
int
startTileIndex
=
nb
.
getStartTileIndex
();
int
numTileIndices
=
nb
.
getNumTiles
();
int
numForceThreadBlocks
=
nb
.
getNumForceThreadBlocks
();
int
forceThreadBlockSize
=
nb
.
getForceThreadBlockSize
();
if
(
pmeGrid
==
NULL
)
{
// Compute induced dipoles.
void
*
computeFixedFieldArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
nb
.
getExclusions
().
getDevicePointer
(),
&
nb
.
getExclusionIndices
().
getDevicePointer
(),
&
nb
.
getExclusionRowIndices
().
getDevicePointer
(),
&
nb
.
getExclusionIndices
().
getDevicePointer
(),
&
nb
.
getExclusionRowIndices
().
getDevicePointer
(),
&
covalentFlags
->
getDevicePointer
(),
&
polarizationGroupFlags
->
getDevicePointer
(),
&
startTileIndex
,
&
numTileIndices
,
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
computeFixedFieldKernel
,
computeFixedFieldArgs
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
vec
t
or
<
unsigned
long
long
>
f
;
field
->
download
(
f
)
;
int
pad
=
cu
.
get
Padded
NumAtoms
();
for
(
int
i
=
0
;
i
<
cu
.
getNumAtoms
()
;
i
++
)
{
printf
(
"%d %g %g %g
\n
"
,
i
,
f
[
i
]
/
(
double
)
0xFFFFFFFF
,
f
[
i
+
pad
]
/
(
double
)
0xFFFFFFFF
,
f
[
i
+
pad
*
2
]
/
(
double
)
0xFFFFFFFF
);
v
oid
*
r
ecor
dInducedDipolesArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
()}
;
cu
.
executeKernel
(
recordInducedDipolesKernel
,
recordInducedDipolesArgs
,
cu
.
getNumAtoms
()
)
;
for
(
int
i
=
0
;
i
<
maxInducedIterations
;
i
++
)
{
}
// Compute electrostatic force.
void
*
electrostaticsArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
nb
.
getExclusionIndices
().
getDevicePointer
(),
&
nb
.
getExclusionRowIndices
().
getDevicePointer
(),
&
covalentFlags
->
getDevicePointer
(),
&
polarizationGroupFlags
->
getDevicePointer
(),
&
startTileIndex
,
&
numTileIndices
,
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
electrostaticsKernel
,
electrostaticsArgs
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
return
0.0
;
}
...
...
@@ -1598,28 +1628,22 @@ void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const Amoeba
throw
OpenMMException
(
"Illegal combining rule for sigma: "
+
sigmaCombiningRule
);
string
epsilonCombiningRule
=
force
.
getEpsilonCombiningRule
();
if
(
epsilonCombiningRule
==
"ARITHMETIC"
)
replacements
[
"EPILON_COMBINING_RULE"
]
=
"1"
;
replacements
[
"EP
S
ILON_COMBINING_RULE"
]
=
"1"
;
else
if
(
epsilonCombiningRule
==
"GEOMETRIC"
)
replacements
[
"EPILON_COMBINING_RULE"
]
=
"2"
;
replacements
[
"EP
S
ILON_COMBINING_RULE"
]
=
"2"
;
else
if
(
epsilonCombiningRule
==
"HARMONIC"
)
replacements
[
"EPILON_COMBINING_RULE"
]
=
"3"
;
replacements
[
"EP
S
ILON_COMBINING_RULE"
]
=
"3"
;
else
if
(
epsilonCombiningRule
==
"HHG"
)
replacements
[
"EPILON_COMBINING_RULE"
]
=
"4"
;
replacements
[
"EP
S
ILON_COMBINING_RULE"
]
=
"4"
;
else
throw
OpenMMException
(
"Illegal combining rule for sigma: "
+
sigmaCombiningRule
);
double
cutoff
=
force
.
getCutoff
();
double
taperCutoff
=
cutoff
*
0.9
;
replacements
[
"CUTOFF_DISTANCE"
]
=
cu
.
doubleToString
(
force
.
getCutoff
());
replacements
[
"TAPER_CUTOFF"
]
=
cu
.
doubleToString
(
taperCutoff
);
double
cutoff2
=
cutoff
*
cutoff
;
double
taperCutoff2
=
taperCutoff
*
taperCutoff
;
double
denom
=
pow
(
cutoff
-
taperCutoff
,
-
5.0
);
replacements
[
"TAPER_C0"
]
=
cu
.
doubleToString
(
cutoff
*
cutoff2
*
(
cutoff2
-
5.0
*
cutoff
*
taperCutoff
+
10.0
*
taperCutoff2
)
*
denom
);
replacements
[
"TAPER_C1"
]
=
cu
.
doubleToString
(
-
30.0
*
cutoff2
*
taperCutoff2
*
denom
);
replacements
[
"TAPER_C2"
]
=
cu
.
doubleToString
(
30.0
*
(
cutoff2
*
taperCutoff
+
cutoff
*
taperCutoff2
)
*
denom
);
replacements
[
"TAPER_C3"
]
=
cu
.
doubleToString
(
-
10.0
*
(
cutoff2
+
4.0
*
cutoff
*
taperCutoff
+
taperCutoff2
)
*
denom
);
replacements
[
"TAPER_C4"
]
=
cu
.
doubleToString
(
15.0
*
(
cutoff
+
taperCutoff
)
*
denom
);
replacements
[
"TAPER_C5"
]
=
cu
.
doubleToString
(
-
6.0
*
denom
);
replacements
[
"TAPER_C3"
]
=
cu
.
doubleToString
(
10
/
pow
(
taperCutoff
-
cutoff
,
3.0
));
replacements
[
"TAPER_C4"
]
=
cu
.
doubleToString
(
15
/
pow
(
taperCutoff
-
cutoff
,
4.0
));
replacements
[
"TAPER_C5"
]
=
cu
.
doubleToString
(
6
/
pow
(
taperCutoff
-
cutoff
,
5.0
));
nonbonded
->
addInteraction
(
force
.
getUseNeighborList
(),
force
.
getPBC
(),
true
,
force
.
getCutoff
(),
exclusions
,
cu
.
replaceStrings
(
CudaAmoebaKernelSources
::
amoebaVdwForce2
,
replacements
),
force
.
getForceGroup
());
...
...
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.h
View file @
776f6f22
...
...
@@ -374,14 +374,14 @@ public:
private:
class
ForceInfo
;
void
initializeScaleFactors
();
int
numMultipoles
;
int
numMultipoles
,
maxInducedIterations
;
double
inducedEpsilon
;
bool
hasInitializedScaleFactors
;
CudaContext
&
cu
;
System
&
system
;
std
::
vector
<
int3
>
covalentFlagValues
;
std
::
vector
<
int2
>
polarizationFlagValues
;
CudaArray
*
multipoleParticles
;
CudaArray
*
torqueBufferIndices
;
CudaArray
*
molecularDipoles
;
CudaArray
*
molecularQuadrupoles
;
CudaArray
*
labFrameDipoles
;
...
...
@@ -412,7 +412,7 @@ private:
CudaArray
*
pmeAtomGridIndex
;
CudaSort
*
sort
;
cufftHandle
fft
;
CUfunction
computeMomentsKernel
,
computeFixedField
Kernel
;
CUfunction
computeMomentsKernel
,
recordInducedDipolesKernel
,
computeFixedFieldKernel
,
electrostatics
Kernel
;
};
/**
...
...
plugins/amoeba/platforms/cuda2/src/kernels/amoebaVdwForce2.cu
View file @
776f6f22
...
...
@@ -15,13 +15,11 @@
real
sigma
=
2
*
(
sigmaEpsilon1
.
x
*
sigma1_2
+
sigmaEpsilon2
.
x
*
sigma2_2
)
/
(
sigma1_2
+
sigma2_2
);
#endif
#if EPSILON_COMBINING_RULE == 1
real
epsilon
=
sigmaEpsilon1
.
y
+
sigmaEpsilon2
.
y
;
real
epsilon
=
0.5
f
*
(
sigmaEpsilon1
.
y
+
sigmaEpsilon2
.
y
)
;
#elif EPSILON_COMBINING_RULE == 2
real
epsilon
=
2
*
SQRT
(
sigmaEpsilon1
.
y
*
sigmaEpsilon2
.
y
);
real
epsilon
=
SQRT
(
sigmaEpsilon1
.
y
*
sigmaEpsilon2
.
y
);
#elif EPSILON_COMBINING_RULE == 3
real
epsilon1_2
=
sigmaEpsilon1
.
x
*
sigmaEpsilon1
.
x
;
real
epsilon2_2
=
sigmaEpsilon2
.
x
*
sigmaEpsilon2
.
x
;
real
epsilon
=
2
*
(
sigmaEpsilon1
.
x
*
epsilon1_2
+
sigmaEpsilon2
.
x
*
epsilon2_2
)
/
(
epsilon1_2
+
epsilon2_2
);
real
epsilon
=
2
*
(
sigmaEpsilon1
.
y
*
sigmaEpsilon2
.
y
)
/
(
sigmaEpsilon1
.
y
+
sigmaEpsilon2
.
y
);
#else
real
epsilon_s
=
SQRT
(
sigmaEpsilon1
.
y
)
+
SQRT
(
sigmaEpsilon2
.
y
);
real
epsilon
=
4
*
sigmaEpsilon1
.
y
*
sigmaEpsilon2
.
y
/
(
epsilon_s
*
epsilon_s
);
...
...
@@ -39,13 +37,14 @@
real
tmp
=
sigma7
*
invRho
;
real
gTau
=
epsilon
*
tau7
*
r6
*
1.12
f
*
tmp
*
tmp
;
real
termEnergy
=
epsilon
*
sigma7
*
tau7
*
((
sigma7
*
1.12
f
*
invRho
)
-
2.0
f
);
real
deltaE
=
(
-
7.0
f
*
(
dTau
*
termEnergy
+
gTau
)
)
*
invR
;
real
deltaE
=
-
7.0
f
*
(
dTau
*
termEnergy
+
gTau
);
if
(
r
>
TAPER_CUTOFF
)
{
real
taper
=
TAPER_C0
+
r
*
(
TAPER_C1
+
r
*
(
TAPER_C2
+
r
*
(
TAPER_C3
+
r
*
(
TAPER_C4
+
r
*
TAPER_C5
))));
real
dtaper
=
TAPER_C1
+
r
*
(
2
*
TAPER_C2
+
r
*
(
3
*
TAPER_C3
+
r
*
(
4
*
TAPER_C4
+
r
*
5
*
TAPER_C5
)));
real
x
=
r
-
TAPER_CUTOFF
;
real
taper
=
1
+
x
*
x
*
x
*
(
TAPER_C3
+
x
*
(
TAPER_C4
+
x
*
TAPER_C5
));
real
dtaper
=
x
*
x
*
(
3
*
TAPER_C3
+
x
*
(
4
*
TAPER_C4
+
x
*
5
*
TAPER_C5
));
deltaE
=
termEnergy
*
dtaper
+
deltaE
*
taper
;
termEnergy
*=
taper
;
}
tempEnergy
+=
(
includeInteraction
?
termEnergy
:
0
);
dEdR
-=
(
includeInteraction
?
deltaE
:
0
);
dEdR
-=
(
includeInteraction
?
deltaE
*
invR
:
0
);
}
plugins/amoeba/platforms/cuda2/src/kernels/electrostaticPairForce.cu
0 → 100644
View file @
776f6f22
/**
* This defines three different closely related functions, depending on which constant (F1, T1, or T3) is defined.
*/
#if defined F1
__device__
void
computeOneInteractionF1
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
float
dScale
,
float
pScale
,
float
mScale
,
real
&
energy
,
real3
&
outputForce
)
{
#elif defined T1
__device__
void
computeOneInteractionT1
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
float
dScale
,
float
pScale
,
float
mScale
,
real3
&
outputForce
)
{
#else
__device__
void
computeOneInteractionT3
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
float
dScale
,
float
pScale
,
float
mScale
,
real3
&
outputForce
)
{
#endif
#ifdef F1
const
float
uScale
=
1
;
real
ddsc3_0
=
0
;
real
ddsc3_1
=
0
;
real
ddsc3_2
=
0
;
real
ddsc5_0
=
0
;
real
ddsc5_1
=
0
;
real
ddsc5_2
=
0
;
real
ddsc7_0
=
0
;
real
ddsc7_1
=
0
;
real
ddsc7_2
=
0
;
#endif
real
xr
=
atom2
.
posq
.
x
-
atom1
.
posq
.
x
;
real
yr
=
atom2
.
posq
.
y
-
atom1
.
posq
.
y
;
real
zr
=
atom2
.
posq
.
z
-
atom1
.
posq
.
z
;
real
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
real
r
=
SQRT
(
r2
);
real
rr1
=
RECIP
(
r
);
real
rr2
=
rr1
*
rr1
;
real
rr3
=
rr1
*
rr2
;
real
rr5
=
3
*
rr3
*
rr2
;
real
rr7
=
5
*
rr5
*
rr2
;
real
rr9
=
7
*
rr7
*
rr2
;
#ifdef F1
real
rr11
=
9
*
rr9
*
rr2
;
#endif
real
scale3
=
1
;
real
scale5
=
1
;
real
scale7
=
1
;
real
pdamp
=
atom1
.
damp
*
atom2
.
damp
;
if
(
pdamp
!=
0
)
{
real
ratio
=
r
/
pdamp
;
float
pGamma
=
atom2
.
thole
>
atom1
.
thole
?
atom1
.
thole
:
atom2
.
thole
;
real
damp
=
ratio
*
ratio
*
ratio
*
pGamma
;
real
dampExp
=
EXP
(
-
damp
);
real
damp1
=
damp
+
1
;
real
damp2
=
damp
*
damp
;
scale3
=
1
-
dampExp
;
scale5
=
1
-
damp1
*
dampExp
;
scale7
=
1
-
(
damp1
+
0.6
f
*
damp2
)
*
dampExp
;
#ifdef F1
real
factor
=
3
*
damp
*
dampExp
*
rr2
;
real
factor7
=
-
0.2
f
+
0.6
f
*
damp
;
ddsc3_0
=
factor
*
xr
;
ddsc5_0
=
ddsc3_0
*
damp
;
ddsc7_0
=
ddsc5_0
*
factor7
;
ddsc3_1
=
factor
*
yr
;
ddsc5_1
=
ddsc3_1
*
damp
;
ddsc7_1
=
ddsc5_1
*
factor7
;
ddsc3_2
=
factor
*
zr
;
ddsc5_2
=
ddsc3_2
*
damp
;
ddsc7_2
=
ddsc5_2
*
factor7
;
#endif
}
#if defined F1
real
scale3i
=
rr3
*
scale3
*
uScale
;
real
scale5i
=
rr5
*
scale5
*
uScale
;
#endif
real
dsc3
=
rr3
*
scale3
*
dScale
;
real
psc3
=
rr3
*
scale3
*
pScale
;
real
dsc5
=
rr5
*
scale5
*
dScale
;
real
psc5
=
rr5
*
scale5
*
pScale
;
real
dsc7
=
rr7
*
scale7
*
dScale
;
real
psc7
=
rr7
*
scale7
*
pScale
;
real
atom2quadrupoleZZ
=
1
-
atom2
.
quadrupoleXX
-
atom2
.
quadrupoleYY
;
real
qJr_0
=
atom2
.
quadrupoleXX
*
xr
+
atom2
.
quadrupoleXY
*
yr
+
atom2
.
quadrupoleXZ
*
zr
;
real
qJr_1
=
atom2
.
quadrupoleXY
*
xr
+
atom2
.
quadrupoleYY
*
yr
+
atom2
.
quadrupoleYZ
*
zr
;
real
qJr_2
=
atom2
.
quadrupoleXZ
*
xr
+
atom2
.
quadrupoleYZ
*
yr
+
atom2quadrupoleZZ
*
zr
;
real
atom1quadrupoleZZ
=
1
-
atom1
.
quadrupoleXX
-
atom1
.
quadrupoleYY
;
real
qIr_0
=
atom1
.
quadrupoleXX
*
xr
+
atom1
.
quadrupoleXY
*
yr
+
atom1
.
quadrupoleXZ
*
zr
;
real
qIr_1
=
atom1
.
quadrupoleXY
*
xr
+
atom1
.
quadrupoleYY
*
yr
+
atom1
.
quadrupoleYZ
*
zr
;
real
qIr_2
=
atom1
.
quadrupoleXZ
*
xr
+
atom1
.
quadrupoleYZ
*
yr
+
atom1quadrupoleZZ
*
zr
;
#if defined F1
real
sc2
=
atom1
.
dipole
.
x
*
atom2
.
dipole
.
x
+
atom1
.
dipole
.
y
*
atom2
.
dipole
.
y
+
atom1
.
dipole
.
z
*
atom2
.
dipole
.
z
;
#endif
#if defined F1 || defined T1
real
sc4
=
atom2
.
dipole
.
x
*
xr
+
atom2
.
dipole
.
y
*
yr
+
atom2
.
dipole
.
z
*
zr
;
real
sc6
=
qJr_0
*
xr
+
qJr_1
*
yr
+
qJr_2
*
zr
;
#endif
#if defined F1 || defined T3
real
sc3
=
atom1
.
dipole
.
x
*
xr
+
atom1
.
dipole
.
y
*
yr
+
atom1
.
dipole
.
z
*
zr
;
real
sc5
=
qIr_0
*
xr
+
qIr_1
*
yr
+
qIr_2
*
zr
;
#endif
#if defined F1
real
sc7
=
qIr_0
*
atom2
.
dipole
.
x
+
qIr_1
*
atom2
.
dipole
.
y
+
qIr_2
*
atom2
.
dipole
.
z
;
real
sc8
=
qJr_0
*
atom1
.
dipole
.
x
+
qJr_1
*
atom1
.
dipole
.
y
+
qJr_2
*
atom1
.
dipole
.
z
;
real
sc9
=
qIr_0
*
qJr_0
+
qIr_1
*
qJr_1
+
qIr_2
*
qJr_2
;
real
sc10
=
atom1
.
quadrupoleXX
*
atom2
.
quadrupoleXX
+
atom1
.
quadrupoleXY
*
atom2
.
quadrupoleXY
+
atom1
.
quadrupoleXZ
*
atom2
.
quadrupoleXZ
+
atom1
.
quadrupoleXY
*
atom2
.
quadrupoleXY
+
atom1
.
quadrupoleYY
*
atom2
.
quadrupoleYY
+
atom1
.
quadrupoleYZ
*
atom2
.
quadrupoleYZ
+
atom1
.
quadrupoleXZ
*
atom2
.
quadrupoleXZ
+
atom1
.
quadrupoleYZ
*
atom2
.
quadrupoleYZ
+
atom1quadrupoleZZ
*
atom2quadrupoleZZ
;
real
sci1
=
atom1
.
inducedDipole
.
x
*
atom2
.
dipole
.
x
+
atom1
.
inducedDipole
.
y
*
atom2
.
dipole
.
y
+
atom1
.
inducedDipole
.
z
*
atom2
.
dipole
.
z
+
atom2
.
inducedDipole
.
x
*
atom1
.
dipole
.
x
+
atom2
.
inducedDipole
.
y
*
atom1
.
dipole
.
y
+
atom2
.
inducedDipole
.
z
*
atom1
.
dipole
.
z
;
#endif
#if defined F1 || defined T3
real
sci3
=
atom1
.
inducedDipole
.
x
*
xr
+
atom1
.
inducedDipole
.
y
*
yr
+
atom1
.
inducedDipole
.
z
*
zr
;
#endif
#if defined F1
real
sci7
=
qIr_0
*
atom2
.
inducedDipole
.
x
+
qIr_1
*
atom2
.
inducedDipole
.
y
+
qIr_2
*
atom2
.
inducedDipole
.
z
;
real
sci8
=
qJr_0
*
atom1
.
inducedDipole
.
x
+
qJr_1
*
atom1
.
inducedDipole
.
y
+
qJr_2
*
atom1
.
inducedDipole
.
z
;
#endif
#if defined F1 || defined T1
real
sci4
=
atom2
.
inducedDipole
.
x
*
xr
+
atom2
.
inducedDipole
.
y
*
yr
+
atom2
.
inducedDipole
.
z
*
zr
;
#endif
#if defined F1
real
scip1
=
atom1
.
inducedDipolePolar
.
x
*
atom2
.
dipole
.
x
+
atom1
.
inducedDipolePolar
.
y
*
atom2
.
dipole
.
y
+
atom1
.
inducedDipolePolar
.
z
*
atom2
.
dipole
.
z
+
atom2
.
inducedDipolePolar
.
x
*
atom1
.
dipole
.
x
+
atom2
.
inducedDipolePolar
.
y
*
atom1
.
dipole
.
y
+
atom2
.
inducedDipolePolar
.
z
*
atom1
.
dipole
.
z
;
real
scip2
=
atom1
.
inducedDipole
.
x
*
atom2
.
inducedDipolePolar
.
x
+
atom1
.
inducedDipole
.
y
*
atom2
.
inducedDipolePolar
.
y
+
atom1
.
inducedDipole
.
z
*
atom2
.
inducedDipolePolar
.
z
+
atom2
.
inducedDipole
.
x
*
atom1
.
inducedDipolePolar
.
x
+
atom2
.
inducedDipole
.
y
*
atom1
.
inducedDipolePolar
.
y
+
atom2
.
inducedDipole
.
z
*
atom1
.
inducedDipolePolar
.
z
;
#endif
#if defined F1 || defined T3
real
scip3
=
((
atom1
.
inducedDipolePolar
.
x
)
*
(
xr
)
+
(
atom1
.
inducedDipolePolar
.
y
)
*
(
yr
)
+
(
atom1
.
inducedDipolePolar
.
z
)
*
(
zr
));
#endif
#if defined F1 || defined T1
real
scip4
=
((
atom2
.
inducedDipolePolar
.
x
)
*
(
xr
)
+
(
atom2
.
inducedDipolePolar
.
y
)
*
(
yr
)
+
(
atom2
.
inducedDipolePolar
.
z
)
*
(
zr
));
#endif
#ifdef F1
real
scip7
=
((
qIr_0
)
*
(
atom2
.
inducedDipolePolar
.
x
)
+
(
qIr_1
)
*
(
atom2
.
inducedDipolePolar
.
y
)
+
(
qIr_2
)
*
(
atom2
.
inducedDipolePolar
.
z
));
real
scip8
=
((
qJr_0
)
*
(
atom1
.
inducedDipolePolar
.
x
)
+
(
qJr_1
)
*
(
atom1
.
inducedDipolePolar
.
y
)
+
(
qJr_2
)
*
(
atom1
.
inducedDipolePolar
.
z
));
real
gli1
=
atom2
.
posq
.
w
*
sci3
-
atom1
.
posq
.
w
*
sci4
;
real
gli6
=
sci1
;
real
glip1
=
atom2
.
posq
.
w
*
scip3
-
atom1
.
posq
.
w
*
scip4
;
real
glip6
=
scip1
;
real
gli2
=
-
sc3
*
sci4
-
sci3
*
sc4
;
real
gli3
=
sci3
*
sc6
-
sci4
*
sc5
;
real
gli7
=
2
*
(
sci7
-
sci8
);
real
glip2
=
-
sc3
*
scip4
-
scip3
*
sc4
;
real
glip3
=
scip3
*
sc6
-
scip4
*
sc5
;
real
glip7
=
2
*
(
scip7
-
scip8
);
real
factor3
=
rr3
*
((
gli1
+
gli6
)
*
pScale
+
(
glip1
+
glip6
)
*
dScale
);
real
factor5
=
rr5
*
((
gli2
+
gli7
)
*
pScale
+
(
glip2
+
glip7
)
*
dScale
);
real
factor7
=
rr7
*
(
gli3
*
pScale
+
glip3
*
dScale
);
real
ftm2i_0
=
-
0.5
f
*
(
factor3
*
ddsc3_0
+
factor5
*
ddsc5_0
+
factor7
*
ddsc7_0
);
real
ftm2i_1
=
-
0.5
f
*
(
factor3
*
ddsc3_1
+
factor5
*
ddsc5_1
+
factor7
*
ddsc7_1
);
real
ftm2i_2
=
-
0.5
f
*
(
factor3
*
ddsc3_2
+
factor5
*
ddsc5_2
+
factor7
*
ddsc7_2
);
real
gl0
=
atom1
.
posq
.
w
*
atom2
.
posq
.
w
;
real
gl1
=
atom2
.
posq
.
w
*
sc3
-
atom1
.
posq
.
w
*
sc4
;
real
gl2
=
atom1
.
posq
.
w
*
sc6
+
atom2
.
posq
.
w
*
sc5
-
sc3
*
sc4
;
real
gl3
=
sc3
*
sc6
-
sc4
*
sc5
;
real
gl4
=
sc5
*
sc6
;
real
gl6
=
sc2
;
real
gl7
=
2
*
(
sc7
-
sc8
);
real
gl8
=
2
*
sc10
;
real
gl5
=
-
4
*
sc9
;
real
gf1
=
rr3
*
gl0
+
rr5
*
(
gl1
+
gl6
)
+
rr7
*
(
gl2
+
gl7
+
gl8
)
+
rr9
*
(
gl3
+
gl5
)
+
rr11
*
gl4
;
#endif
#if defined F1 || defined T1
real
gf2
=
-
atom2
.
posq
.
w
*
rr3
+
sc4
*
rr5
-
sc6
*
rr7
;
real
gf5
=
2
*
(
-
atom2
.
posq
.
w
*
rr5
+
sc4
*
rr7
-
sc6
*
rr9
);
#endif
#if defined F1 || defined T3
real
gf3
=
atom1
.
posq
.
w
*
rr3
+
sc3
*
rr5
+
sc5
*
rr7
;
real
gf6
=
2
*
(
-
atom1
.
posq
.
w
*
rr5
-
sc3
*
rr7
-
sc5
*
rr9
);
#endif
#ifdef F1
real
em
=
mScale
*
(
rr1
*
gl0
+
rr3
*
(
gl1
+
gl6
)
+
rr5
*
(
gl2
+
gl7
+
gl8
)
+
rr7
*
(
gl3
+
gl5
)
+
rr9
*
gl4
);
real
ei
=
0.5
f
*
((
gli1
+
gli6
)
*
psc3
+
(
gli2
+
gli7
)
*
psc5
+
gli3
*
psc7
);
energy
=
em
+
ei
;
#endif
#if defined F1 || defined T1
real
qIdJ_0
=
atom1
.
quadrupoleXX
*
atom2
.
dipole
.
x
+
atom1
.
quadrupoleXY
*
atom2
.
dipole
.
y
+
atom1
.
quadrupoleXZ
*
atom2
.
dipole
.
z
;
real
qIdJ_1
=
atom1
.
quadrupoleXY
*
atom2
.
dipole
.
x
+
atom1
.
quadrupoleYY
*
atom2
.
dipole
.
y
+
atom1
.
quadrupoleYZ
*
atom2
.
dipole
.
z
;
real
qIdJ_2
=
atom1
.
quadrupoleXZ
*
atom2
.
dipole
.
x
+
atom1
.
quadrupoleYZ
*
atom2
.
dipole
.
y
+
atom1quadrupoleZZ
*
atom2
.
dipole
.
z
;
real
qIqJr_0
=
atom1
.
quadrupoleXX
*
qJr_0
+
atom1
.
quadrupoleXY
*
qJr_1
+
atom1
.
quadrupoleXZ
*
qJr_2
;
real
qIqJr_1
=
atom1
.
quadrupoleXY
*
qJr_0
+
atom1
.
quadrupoleYY
*
qJr_1
+
atom1
.
quadrupoleYZ
*
qJr_2
;
real
qIqJr_2
=
atom1
.
quadrupoleXZ
*
qJr_0
+
atom1
.
quadrupoleYZ
*
qJr_1
+
atom1quadrupoleZZ
*
qJr_2
;
#endif
#ifdef F1
real
qkqir_0
=
atom2
.
quadrupoleXX
*
qIr_0
+
atom2
.
quadrupoleXY
*
qIr_1
+
atom2
.
quadrupoleXZ
*
qIr_2
;
real
qkqir_1
=
atom2
.
quadrupoleXY
*
qIr_0
+
atom2
.
quadrupoleYY
*
qIr_1
+
atom2
.
quadrupoleYZ
*
qIr_2
;
real
qkqir_2
=
atom2
.
quadrupoleXZ
*
qIr_0
+
atom2
.
quadrupoleYZ
*
qIr_1
+
atom2quadrupoleZZ
*
qIr_2
;
real
qkdi_0
=
atom2
.
quadrupoleXX
*
atom1
.
dipole
.
x
+
atom2
.
quadrupoleXY
*
atom1
.
dipole
.
y
+
atom2
.
quadrupoleXZ
*
atom1
.
dipole
.
z
;
real
qkdi_1
=
atom2
.
quadrupoleXY
*
atom1
.
dipole
.
x
+
atom2
.
quadrupoleYY
*
atom1
.
dipole
.
y
+
atom2
.
quadrupoleYZ
*
atom1
.
dipole
.
z
;
real
qkdi_2
=
atom2
.
quadrupoleXZ
*
atom1
.
dipole
.
x
+
atom2
.
quadrupoleYZ
*
atom1
.
dipole
.
y
+
atom2quadrupoleZZ
*
atom1
.
dipole
.
z
;
real
ftm2_0
=
mScale
*
(
gf1
*
xr
+
gf2
*
atom1
.
dipole
.
x
+
gf3
*
atom2
.
dipole
.
x
+
2
*
rr5
*
(
qkdi_0
-
qIdJ_0
)
+
gf5
*
qIr_0
+
gf6
*
qJr_0
+
4
*
rr7
*
(
qIqJr_0
+
qkqir_0
));
real
ftm2_1
=
mScale
*
(
gf1
*
yr
+
gf2
*
atom1
.
dipole
.
y
+
gf3
*
atom2
.
dipole
.
y
+
2
*
rr5
*
(
qkdi_1
-
qIdJ_1
)
+
gf5
*
qIr_1
+
gf6
*
qJr_1
+
4
*
rr7
*
(
qIqJr_1
+
qkqir_1
));
real
ftm2_2
=
mScale
*
(
gf1
*
zr
+
gf2
*
atom1
.
dipole
.
z
+
gf3
*
atom2
.
dipole
.
z
+
2
*
rr5
*
(
qkdi_2
-
qIdJ_2
)
+
gf5
*
qIr_2
+
gf6
*
qJr_2
+
4
*
rr7
*
(
qIqJr_2
+
qkqir_2
));
real
gfi1
=
rr2
*
(
1.5
f
*
((
gli1
+
gli6
)
*
psc3
+
(
glip1
+
glip6
)
*
dsc3
+
scip2
*
scale3i
)
+
2.5
f
*
((
gli7
+
gli2
)
*
psc5
+
(
glip7
+
glip2
)
*
dsc5
-
(
sci3
*
scip4
+
scip3
*
sci4
)
*
scale5i
)
+
3.5
f
*
(
gli3
*
psc7
+
glip3
*
dsc7
));
ftm2i_0
+=
gfi1
*
xr
;
ftm2i_1
+=
gfi1
*
yr
;
ftm2i_2
+=
gfi1
*
zr
;
#endif
#if defined F1 || defined T1
real
gfi5
=
(
sci4
*
psc7
+
scip4
*
dsc7
);
#endif
#if defined F1 || defined T3
real
gfi6
=
-
(
sci3
*
psc7
+
scip3
*
dsc7
);
#endif
#if defined F1 || defined T1
real
qIuJ_0
=
atom1
.
quadrupoleXX
*
atom2
.
inducedDipole
.
x
+
atom1
.
quadrupoleXY
*
atom2
.
inducedDipole
.
y
+
atom1
.
quadrupoleXZ
*
atom2
.
inducedDipole
.
z
;
real
qIuJ_1
=
atom1
.
quadrupoleXY
*
atom2
.
inducedDipole
.
x
+
atom1
.
quadrupoleYY
*
atom2
.
inducedDipole
.
y
+
atom1
.
quadrupoleYZ
*
atom2
.
inducedDipole
.
z
;
real
qIuJ_2
=
atom1
.
quadrupoleXZ
*
atom2
.
inducedDipole
.
x
+
atom1
.
quadrupoleYZ
*
atom2
.
inducedDipole
.
y
+
atom1quadrupoleZZ
*
atom2
.
inducedDipole
.
z
;
real
qIuJp_0
=
atom1
.
quadrupoleXX
*
atom2
.
inducedDipolePolar
.
x
+
atom1
.
quadrupoleXY
*
atom2
.
inducedDipolePolar
.
y
+
atom1
.
quadrupoleXZ
*
atom2
.
inducedDipolePolar
.
z
;
real
qIuJp_1
=
atom1
.
quadrupoleXY
*
atom2
.
inducedDipolePolar
.
x
+
atom1
.
quadrupoleYY
*
atom2
.
inducedDipolePolar
.
y
+
atom1
.
quadrupoleYZ
*
atom2
.
inducedDipolePolar
.
z
;
real
qIuJp_2
=
atom1
.
quadrupoleXZ
*
atom2
.
inducedDipolePolar
.
x
+
atom1
.
quadrupoleYZ
*
atom2
.
inducedDipolePolar
.
y
+
atom1quadrupoleZZ
*
atom2
.
inducedDipolePolar
.
z
;
#endif
#if defined T3
real
qJuIp_0
=
atom2
.
quadrupoleXX
*
atom1
.
inducedDipolePolar
.
x
+
atom2
.
quadrupoleXY
*
atom1
.
inducedDipolePolar
.
y
+
atom2
.
quadrupoleXZ
*
atom1
.
inducedDipolePolar
.
z
;
real
qJuIp_1
=
atom2
.
quadrupoleXY
*
atom1
.
inducedDipolePolar
.
x
+
atom2
.
quadrupoleYY
*
atom1
.
inducedDipolePolar
.
y
+
atom2
.
quadrupoleYZ
*
atom1
.
inducedDipolePolar
.
z
;
real
qJuIp_2
=
atom2
.
quadrupoleXZ
*
atom1
.
inducedDipolePolar
.
x
+
atom2
.
quadrupoleYZ
*
atom1
.
inducedDipolePolar
.
y
+
atom2quadrupoleZZ
*
atom1
.
inducedDipolePolar
.
z
;
real
qJuI_0
=
atom2
.
quadrupoleXX
*
atom1
.
inducedDipole
.
x
+
atom2
.
quadrupoleXY
*
atom1
.
inducedDipole
.
y
+
atom2
.
quadrupoleXZ
*
atom1
.
inducedDipole
.
z
;
real
qJuI_1
=
atom2
.
quadrupoleXY
*
atom1
.
inducedDipole
.
x
+
atom2
.
quadrupoleYY
*
atom1
.
inducedDipole
.
y
+
atom2
.
quadrupoleYZ
*
atom1
.
inducedDipole
.
z
;
real
qJuI_2
=
atom2
.
quadrupoleXZ
*
atom1
.
inducedDipole
.
x
+
atom2
.
quadrupoleYZ
*
atom1
.
inducedDipole
.
y
+
atom2quadrupoleZZ
*
atom1
.
inducedDipole
.
z
;
#endif
#ifdef F1
real
qkui_0
=
atom2
.
quadrupoleXX
*
atom1
.
inducedDipole
.
x
+
atom2
.
quadrupoleXY
*
atom1
.
inducedDipole
.
y
+
atom2
.
quadrupoleXZ
*
atom1
.
inducedDipole
.
z
;
real
qkui_1
=
atom2
.
quadrupoleXY
*
atom1
.
inducedDipole
.
x
+
atom2
.
quadrupoleYY
*
atom1
.
inducedDipole
.
y
+
atom2
.
quadrupoleYZ
*
atom1
.
inducedDipole
.
z
;
real
qkui_2
=
atom2
.
quadrupoleXZ
*
atom1
.
inducedDipole
.
x
+
atom2
.
quadrupoleYZ
*
atom1
.
inducedDipole
.
y
+
atom2quadrupoleZZ
*
atom1
.
inducedDipole
.
z
;
real
qkuip_0
=
atom2
.
quadrupoleXX
*
atom1
.
inducedDipolePolar
.
x
+
atom2
.
quadrupoleXY
*
atom1
.
inducedDipolePolar
.
y
+
atom2
.
quadrupoleXZ
*
atom1
.
inducedDipolePolar
.
z
;
real
qkuip_1
=
atom2
.
quadrupoleXY
*
atom1
.
inducedDipolePolar
.
x
+
atom2
.
quadrupoleYY
*
atom1
.
inducedDipolePolar
.
y
+
atom2
.
quadrupoleYZ
*
atom1
.
inducedDipolePolar
.
z
;
real
qkuip_2
=
atom2
.
quadrupoleXZ
*
atom1
.
inducedDipolePolar
.
x
+
atom2
.
quadrupoleYZ
*
atom1
.
inducedDipolePolar
.
y
+
atom2quadrupoleZZ
*
atom1
.
inducedDipolePolar
.
z
;
ftm2i_0
+=
0.5
f
*
(
-
atom2
.
posq
.
w
*
(
atom1
.
inducedDipole
.
x
*
psc3
+
atom1
.
inducedDipolePolar
.
x
*
dsc3
)
+
sc4
*
(
atom1
.
inducedDipole
.
x
*
psc5
+
atom1
.
inducedDipolePolar
.
x
*
dsc5
)
-
sc6
*
(
atom1
.
inducedDipole
.
x
*
psc7
+
atom1
.
inducedDipolePolar
.
x
*
dsc7
))
+
0.5
f
*
(
atom1
.
posq
.
w
*
(
atom2
.
inducedDipole
.
x
*
psc3
+
atom2
.
inducedDipolePolar
.
x
*
dsc3
)
+
sc3
*
(
atom2
.
inducedDipole
.
x
*
psc5
+
atom2
.
inducedDipolePolar
.
x
*
dsc5
)
+
sc5
*
(
atom2
.
inducedDipole
.
x
*
psc7
+
atom2
.
inducedDipolePolar
.
x
*
dsc7
))
+
scale5i
*
(
sci4
*
atom1
.
inducedDipolePolar
.
x
+
scip4
*
atom1
.
inducedDipole
.
x
+
sci3
*
atom2
.
inducedDipolePolar
.
x
+
scip3
*
atom2
.
inducedDipole
.
x
)
*
0.5
f
+
0.5
f
*
(
sci4
*
psc5
+
scip4
*
dsc5
)
*
atom1
.
dipole
.
x
+
0.5
f
*
(
sci3
*
psc5
+
scip3
*
dsc5
)
*
atom2
.
dipole
.
x
+
((
qkui_0
-
qIuJ_0
)
*
psc5
+
(
qkuip_0
-
qIuJp_0
)
*
dsc5
)
+
gfi5
*
qIr_0
+
gfi6
*
qJr_0
;
ftm2i_1
+=
0.5
f
*
(
-
atom2
.
posq
.
w
*
(
atom1
.
inducedDipole
.
y
*
psc3
+
atom1
.
inducedDipolePolar
.
y
*
dsc3
)
+
sc4
*
(
atom1
.
inducedDipole
.
y
*
psc5
+
atom1
.
inducedDipolePolar
.
y
*
dsc5
)
-
sc6
*
(
atom1
.
inducedDipole
.
y
*
psc7
+
atom1
.
inducedDipolePolar
.
y
*
dsc7
))
+
(
atom1
.
posq
.
w
*
(
atom2
.
inducedDipole
.
y
*
psc3
+
atom2
.
inducedDipolePolar
.
y
*
dsc3
)
+
sc3
*
(
atom2
.
inducedDipole
.
y
*
psc5
+
atom2
.
inducedDipolePolar
.
y
*
dsc5
)
+
sc5
*
(
atom2
.
inducedDipole
.
y
*
psc7
+
atom2
.
inducedDipolePolar
.
y
*
dsc7
))
*
0.5
f
+
scale5i
*
(
sci4
*
atom1
.
inducedDipolePolar
.
y
+
scip4
*
atom1
.
inducedDipole
.
y
+
sci3
*
atom2
.
inducedDipolePolar
.
y
+
scip3
*
atom2
.
inducedDipole
.
y
)
*
0.5
f
+
0.5
f
*
(
sci4
*
psc5
+
scip4
*
dsc5
)
*
atom1
.
dipole
.
y
+
0.5
f
*
(
sci3
*
psc5
+
scip3
*
dsc5
)
*
atom2
.
dipole
.
y
+
((
qkui_1
-
qIuJ_1
)
*
psc5
+
(
qkuip_1
-
qIuJp_1
)
*
dsc5
)
+
gfi5
*
qIr_1
+
gfi6
*
qJr_1
;
ftm2i_2
+=
0.5
f
*
(
-
atom2
.
posq
.
w
*
(
atom1
.
inducedDipole
.
z
*
psc3
+
atom1
.
inducedDipolePolar
.
z
*
dsc3
)
+
sc4
*
(
atom1
.
inducedDipole
.
z
*
psc5
+
atom1
.
inducedDipolePolar
.
z
*
dsc5
)
-
sc6
*
(
atom1
.
inducedDipole
.
z
*
psc7
+
atom1
.
inducedDipolePolar
.
z
*
dsc7
))
+
(
atom1
.
posq
.
w
*
(
atom2
.
inducedDipole
.
z
*
psc3
+
atom2
.
inducedDipolePolar
.
z
*
dsc3
)
+
sc3
*
(
atom2
.
inducedDipole
.
z
*
psc5
+
atom2
.
inducedDipolePolar
.
z
*
dsc5
)
+
sc5
*
(
atom2
.
inducedDipole
.
z
*
psc7
+
atom2
.
inducedDipolePolar
.
z
*
dsc7
))
*
0.5
f
+
scale5i
*
(
sci4
*
atom1
.
inducedDipolePolar
.
z
+
scip4
*
atom1
.
inducedDipole
.
z
+
sci3
*
atom2
.
inducedDipolePolar
.
z
+
scip3
*
atom2
.
inducedDipole
.
z
)
*
0.5
f
+
0.5
f
*
(
sci4
*
psc5
+
scip4
*
dsc5
)
*
atom1
.
dipole
.
z
+
0.5
f
*
(
sci3
*
psc5
+
scip3
*
dsc5
)
*
atom2
.
dipole
.
z
+
((
qkui_2
-
qIuJ_2
)
*
psc5
+
(
qkuip_2
-
qIuJp_2
)
*
dsc5
)
+
gfi5
*
qIr_2
+
gfi6
*
qJr_2
;
#ifdef DIRECT_POLARIZATION
real
gfd
=
0.5
*
(
3
*
rr2
*
scip2
*
scale3i
-
5
*
rr2
*
(
scip3
*
sci4
+
sci3
*
scip4
)
*
scale5i
);
real
temp5
=
0.5
*
scale5i
;
real
fdir_0
=
gfd
*
xr
+
temp5
*
(
sci4
*
atom1
.
inducedDipolePolar
.
x
+
scip4
*
atom1
.
inducedDipole
.
x
+
sci3
*
atom2
.
inducedDipolePolar
.
x
+
scip3
*
atom2
.
inducedDipole
.
x
);
real
fdir_1
=
gfd
*
yr
+
temp5
*
(
sci4
*
atom1
.
inducedDipolePolar
.
y
+
scip4
*
atom1
.
inducedDipole
.
y
+
sci3
*
atom2
.
inducedDipolePolar
.
y
+
scip3
*
atom2
.
inducedDipole
.
y
);
real
fdir_2
=
gfd
*
zr
+
temp5
*
(
sci4
*
atom1
.
inducedDipolePolar
.
z
+
scip4
*
atom1
.
inducedDipole
.
z
+
sci3
*
atom2
.
inducedDipolePolar
.
z
+
scip3
*
atom2
.
inducedDipole
.
z
);
ftm2i_0
-=
fdir_0
;
ftm2i_1
-=
fdir_1
;
ftm2i_2
-=
fdir_2
;
#else
real
scaleF
=
0.5
f
*
uScale
;
real
inducedFactor3
=
scip2
*
rr3
*
scaleF
;
real
inducedFactor5
=
(
sci3
*
scip4
+
scip3
*
sci4
)
*
rr5
*
scaleF
;
real
findmp_0
=
inducedFactor3
*
ddsc3_0
-
inducedFactor5
*
ddsc5_0
;
real
findmp_1
=
inducedFactor3
*
ddsc3_1
-
inducedFactor5
*
ddsc5_1
;
real
findmp_2
=
inducedFactor3
*
ddsc3_2
-
inducedFactor5
*
ddsc5_2
;
ftm2i_0
-=
findmp_0
;
ftm2i_1
-=
findmp_1
;
ftm2i_2
-=
findmp_2
;
#endif
#endif
#if defined T1
real
gti2
=
0.5
f
*
(
sci4
*
psc5
+
scip4
*
dsc5
);
real
gti5
=
gfi5
;
#endif
#if defined T3
real
gti3
=
0.5
f
*
(
sci3
*
psc5
+
scip3
*
dsc5
);
real
gti6
=
gfi6
;
#endif
#if defined T1 || defined T3
real
dixdk_0
=
atom1
.
dipole
.
y
*
atom2
.
dipole
.
z
-
atom1
.
dipole
.
z
*
atom2
.
dipole
.
y
;
real
dixdk_1
=
atom1
.
dipole
.
z
*
atom2
.
dipole
.
x
-
atom1
.
dipole
.
x
*
atom2
.
dipole
.
z
;
real
dixdk_2
=
atom1
.
dipole
.
x
*
atom2
.
dipole
.
y
-
atom1
.
dipole
.
y
*
atom2
.
dipole
.
x
;
#if defined T1
real
dixuk_0
=
atom1
.
dipole
.
y
*
atom2
.
inducedDipole
.
z
-
atom1
.
dipole
.
z
*
atom2
.
inducedDipole
.
y
;
real
dixuk_1
=
atom1
.
dipole
.
z
*
atom2
.
inducedDipole
.
x
-
atom1
.
dipole
.
x
*
atom2
.
inducedDipole
.
z
;
real
dixuk_2
=
atom1
.
dipole
.
x
*
atom2
.
inducedDipole
.
y
-
atom1
.
dipole
.
y
*
atom2
.
inducedDipole
.
x
;
#endif
#endif
#ifdef T1
real
dixukp_0
=
atom1
.
dipole
.
y
*
atom2
.
inducedDipolePolar
.
z
-
atom1
.
dipole
.
z
*
atom2
.
inducedDipolePolar
.
y
;
real
dixukp_1
=
atom1
.
dipole
.
z
*
atom2
.
inducedDipolePolar
.
x
-
atom1
.
dipole
.
x
*
atom2
.
inducedDipolePolar
.
z
;
real
dixukp_2
=
atom1
.
dipole
.
x
*
atom2
.
inducedDipolePolar
.
y
-
atom1
.
dipole
.
y
*
atom2
.
inducedDipolePolar
.
x
;
#endif
#ifdef T1
real
dixr_0
=
atom1
.
dipole
.
y
*
zr
-
atom1
.
dipole
.
z
*
yr
;
real
dixr_1
=
atom1
.
dipole
.
z
*
xr
-
atom1
.
dipole
.
x
*
zr
;
real
dixr_2
=
atom1
.
dipole
.
x
*
yr
-
atom1
.
dipole
.
y
*
xr
;
#endif
#ifdef T1
real
rxqiukp_0
=
yr
*
qIuJp_2
-
zr
*
qIuJp_1
;
real
rxqiukp_1
=
zr
*
qIuJp_0
-
xr
*
qIuJp_2
;
real
rxqiukp_2
=
xr
*
qIuJp_1
-
yr
*
qIuJp_0
;
real
rxqir_0
=
yr
*
qIr_2
-
zr
*
qIr_1
;
real
rxqir_1
=
zr
*
qIr_0
-
xr
*
qIr_2
;
real
rxqir_2
=
xr
*
qIr_1
-
yr
*
qIr_0
;
real
rxqiuk_0
=
yr
*
qIuJ_2
-
zr
*
qIuJ_1
;
real
rxqiuk_1
=
zr
*
qIuJ_0
-
xr
*
qIuJ_2
;
real
rxqiuk_2
=
xr
*
qIuJ_1
-
yr
*
qIuJ_0
;
real
ukxqir_0
=
atom2
.
inducedDipole
.
y
*
qIr_2
-
atom2
.
inducedDipole
.
z
*
qIr_1
;
real
ukxqir_1
=
atom2
.
inducedDipole
.
z
*
qIr_0
-
atom2
.
inducedDipole
.
x
*
qIr_2
;
real
ukxqir_2
=
atom2
.
inducedDipole
.
x
*
qIr_1
-
atom2
.
inducedDipole
.
y
*
qIr_0
;
real
ukxqirp_0
=
atom2
.
inducedDipolePolar
.
y
*
qIr_2
-
atom2
.
inducedDipolePolar
.
z
*
qIr_1
;
real
ukxqirp_1
=
atom2
.
inducedDipolePolar
.
z
*
qIr_0
-
atom2
.
inducedDipolePolar
.
x
*
qIr_2
;
real
ukxqirp_2
=
atom2
.
inducedDipolePolar
.
x
*
qIr_1
-
atom2
.
inducedDipolePolar
.
y
*
qIr_0
;
real
dixqkr_0
=
atom1
.
dipole
.
y
*
qJr_2
-
atom1
.
dipole
.
z
*
qJr_1
;
real
dixqkr_1
=
atom1
.
dipole
.
z
*
qJr_0
-
atom1
.
dipole
.
x
*
qJr_2
;
real
dixqkr_2
=
atom1
.
dipole
.
x
*
qJr_1
-
atom1
.
dipole
.
y
*
qJr_0
;
real
dkxqir_0
=
atom2
.
dipole
.
y
*
qIr_2
-
atom2
.
dipole
.
z
*
qIr_1
;
real
dkxqir_1
=
atom2
.
dipole
.
z
*
qIr_0
-
atom2
.
dipole
.
x
*
qIr_2
;
real
dkxqir_2
=
atom2
.
dipole
.
x
*
qIr_1
-
atom2
.
dipole
.
y
*
qIr_0
;
real
rxqikr_0
=
yr
*
qIqJr_2
-
zr
*
qIqJr_1
;
real
rxqikr_1
=
zr
*
qIqJr_0
-
xr
*
qIqJr_2
;
real
rxqikr_2
=
xr
*
qIqJr_1
-
yr
*
qIqJr_0
;
real
rxqidk_0
=
yr
*
qIdJ_2
-
zr
*
qIdJ_1
;
real
rxqidk_1
=
zr
*
qIdJ_0
-
xr
*
qIdJ_2
;
real
rxqidk_2
=
xr
*
qIdJ_1
-
yr
*
qIdJ_0
;
real
qkrxqir_0
=
qJr_1
*
qIr_2
-
qJr_2
*
qIr_1
;
real
qkrxqir_1
=
qJr_2
*
qIr_0
-
qJr_0
*
qIr_2
;
real
qkrxqir_2
=
qJr_0
*
qIr_1
-
qJr_1
*
qIr_0
;
#endif
#if defined T1 || defined T3
real
qixqk_0
=
atom1
.
quadrupoleXY
*
atom2
.
quadrupoleXZ
+
atom1
.
quadrupoleYY
*
atom2
.
quadrupoleYZ
+
atom1
.
quadrupoleYZ
*
atom2quadrupoleZZ
-
atom1
.
quadrupoleXZ
*
atom2
.
quadrupoleXY
-
atom1
.
quadrupoleYZ
*
atom2
.
quadrupoleYY
-
atom1quadrupoleZZ
*
atom2
.
quadrupoleYZ
;
real
qixqk_1
=
atom1
.
quadrupoleXZ
*
atom2
.
quadrupoleXX
+
atom1
.
quadrupoleYZ
*
atom2
.
quadrupoleXY
+
atom1quadrupoleZZ
*
atom2
.
quadrupoleXZ
-
atom1
.
quadrupoleXX
*
atom2
.
quadrupoleXZ
-
atom1
.
quadrupoleXY
*
atom2
.
quadrupoleYZ
-
atom1
.
quadrupoleXZ
*
atom2quadrupoleZZ
;
real
qixqk_2
=
atom1
.
quadrupoleXX
*
atom2
.
quadrupoleXY
+
atom1
.
quadrupoleXY
*
atom2
.
quadrupoleYY
+
atom1
.
quadrupoleXZ
*
atom2
.
quadrupoleYZ
-
atom1
.
quadrupoleXY
*
atom2
.
quadrupoleXX
-
atom1
.
quadrupoleYY
*
atom2
.
quadrupoleXY
-
atom1
.
quadrupoleYZ
*
atom2
.
quadrupoleXZ
;
#endif
#ifdef T1
real
ttm2_0
=
-
rr3
*
dixdk_0
+
gf2
*
dixr_0
-
gf5
*
rxqir_0
+
2
*
rr5
*
(
dixqkr_0
+
dkxqir_0
+
rxqidk_0
-
2
*
qixqk_0
)
-
4
*
rr7
*
(
rxqikr_0
+
qkrxqir_0
);
real
ttm2_1
=
-
rr3
*
dixdk_1
+
gf2
*
dixr_1
-
gf5
*
rxqir_1
+
2
*
rr5
*
(
dixqkr_1
+
dkxqir_1
+
rxqidk_1
-
2
*
qixqk_1
)
-
4
*
rr7
*
(
rxqikr_1
+
qkrxqir_1
);
real
ttm2_2
=
-
rr3
*
dixdk_2
+
gf2
*
dixr_2
-
gf5
*
rxqir_2
+
2
*
rr5
*
(
dixqkr_2
+
dkxqir_2
+
rxqidk_2
-
2
*
qixqk_2
)
-
4
*
rr7
*
(
rxqikr_2
+
qkrxqir_2
);
real
ttm2i_0
=
-
(
dixuk_0
*
psc3
+
dixukp_0
*
dsc3
)
*
0.5
f
+
gti2
*
dixr_0
+
((
ukxqir_0
+
rxqiuk_0
)
*
psc5
+
(
ukxqirp_0
+
rxqiukp_0
)
*
dsc5
)
-
gti5
*
rxqir_0
;
real
ttm2i_1
=
-
(
dixuk_1
*
psc3
+
dixukp_1
*
dsc3
)
*
0.5
f
+
gti2
*
dixr_1
+
((
ukxqir_1
+
rxqiuk_1
)
*
psc5
+
(
ukxqirp_1
+
rxqiukp_1
)
*
dsc5
)
-
gti5
*
rxqir_1
;
real
ttm2i_2
=
-
(
dixuk_2
*
psc3
+
dixukp_2
*
dsc3
)
*
0.5
f
+
gti2
*
dixr_2
+
((
ukxqir_2
+
rxqiuk_2
)
*
psc5
+
(
ukxqirp_2
+
rxqiukp_2
)
*
dsc5
)
-
gti5
*
rxqir_2
;
#endif
#ifdef T3
real
qJqIr_0
=
atom2
.
quadrupoleXX
*
qIr_0
+
atom2
.
quadrupoleXY
*
qIr_1
+
atom2
.
quadrupoleXZ
*
qIr_2
;
real
qJqIr_1
=
atom2
.
quadrupoleXY
*
qIr_0
+
atom2
.
quadrupoleYY
*
qIr_1
+
atom2
.
quadrupoleYZ
*
qIr_2
;
real
qJqIr_2
=
atom2
.
quadrupoleXZ
*
qIr_0
+
atom2
.
quadrupoleYZ
*
qIr_1
+
atom2quadrupoleZZ
*
qIr_2
;
real
qJdI_0
=
atom2
.
quadrupoleXX
*
atom1
.
dipole
.
x
+
atom2
.
quadrupoleXY
*
atom1
.
dipole
.
y
+
atom2
.
quadrupoleXZ
*
atom1
.
dipole
.
z
;
real
qJdI_1
=
atom2
.
quadrupoleXY
*
atom1
.
dipole
.
x
+
atom2
.
quadrupoleYY
*
atom1
.
dipole
.
y
+
atom2
.
quadrupoleYZ
*
atom1
.
dipole
.
z
;
real
qJdI_2
=
atom2
.
quadrupoleXZ
*
atom1
.
dipole
.
x
+
atom2
.
quadrupoleYZ
*
atom1
.
dipole
.
y
+
atom2quadrupoleZZ
*
atom1
.
dipole
.
z
;
real
dkxr_0
=
atom2
.
dipole
.
y
*
zr
-
atom2
.
dipole
.
z
*
yr
;
real
dkxr_1
=
atom2
.
dipole
.
z
*
xr
-
atom2
.
dipole
.
x
*
zr
;
real
dkxr_2
=
atom2
.
dipole
.
x
*
yr
-
atom2
.
dipole
.
y
*
xr
;
real
rxqkr_0
=
yr
*
qJr_2
-
zr
*
qJr_1
;
real
rxqkr_1
=
zr
*
qJr_0
-
xr
*
qJr_2
;
real
rxqkr_2
=
xr
*
qJr_1
-
yr
*
qJr_0
;
real
dixqkr_0
=
atom1
.
dipole
.
y
*
qJr_2
-
atom1
.
dipole
.
z
*
qJr_1
;
real
dixqkr_1
=
atom1
.
dipole
.
z
*
qJr_0
-
atom1
.
dipole
.
x
*
qJr_2
;
real
dixqkr_2
=
atom1
.
dipole
.
x
*
qJr_1
-
atom1
.
dipole
.
y
*
qJr_0
;
real
dkxqir_0
=
atom2
.
dipole
.
y
*
qIr_2
-
atom2
.
dipole
.
z
*
qIr_1
;
real
dkxqir_1
=
atom2
.
dipole
.
z
*
qIr_0
-
atom2
.
dipole
.
x
*
qIr_2
;
real
dkxqir_2
=
atom2
.
dipole
.
x
*
qIr_1
-
atom2
.
dipole
.
y
*
qIr_0
;
real
rxqkdi_0
=
yr
*
qJdI_2
-
zr
*
qJdI_1
;
real
rxqkdi_1
=
zr
*
qJdI_0
-
xr
*
qJdI_2
;
real
rxqkdi_2
=
xr
*
qJdI_1
-
yr
*
qJdI_0
;
real
rxqkir_0
=
yr
*
qJqIr_2
-
zr
*
qJqIr_1
;
real
rxqkir_1
=
zr
*
qJqIr_0
-
xr
*
qJqIr_2
;
real
rxqkir_2
=
xr
*
qJqIr_1
-
yr
*
qJqIr_0
;
real
qkrxqir_0
=
qJr_1
*
qIr_2
-
qJr_2
*
qIr_1
;
real
qkrxqir_1
=
qJr_2
*
qIr_0
-
qJr_0
*
qIr_2
;
real
qkrxqir_2
=
qJr_0
*
qIr_1
-
qJr_1
*
qIr_0
;
real
dkxui_0
=
atom2
.
dipole
.
y
*
atom1
.
inducedDipole
.
z
-
atom2
.
dipole
.
z
*
atom1
.
inducedDipole
.
y
;
real
dkxui_1
=
atom2
.
dipole
.
z
*
atom1
.
inducedDipole
.
x
-
atom2
.
dipole
.
x
*
atom1
.
inducedDipole
.
z
;
real
dkxui_2
=
atom2
.
dipole
.
x
*
atom1
.
inducedDipole
.
y
-
atom2
.
dipole
.
y
*
atom1
.
inducedDipole
.
x
;
real
dkxuip_0
=
atom2
.
dipole
.
y
*
atom1
.
inducedDipolePolar
.
z
-
atom2
.
dipole
.
z
*
atom1
.
inducedDipolePolar
.
y
;
real
dkxuip_1
=
atom2
.
dipole
.
z
*
atom1
.
inducedDipolePolar
.
x
-
atom2
.
dipole
.
x
*
atom1
.
inducedDipolePolar
.
z
;
real
dkxuip_2
=
atom2
.
dipole
.
x
*
atom1
.
inducedDipolePolar
.
y
-
atom2
.
dipole
.
y
*
atom1
.
inducedDipolePolar
.
x
;
real
uixqkrp_0
=
atom1
.
inducedDipolePolar
.
y
*
qJr_2
-
atom1
.
inducedDipolePolar
.
z
*
qJr_1
;
real
uixqkrp_1
=
atom1
.
inducedDipolePolar
.
z
*
qJr_0
-
atom1
.
inducedDipolePolar
.
x
*
qJr_2
;
real
uixqkrp_2
=
atom1
.
inducedDipolePolar
.
x
*
qJr_1
-
atom1
.
inducedDipolePolar
.
y
*
qJr_0
;
real
uixqkr_0
=
atom1
.
inducedDipole
.
y
*
qJr_2
-
atom1
.
inducedDipole
.
z
*
qJr_1
;
real
uixqkr_1
=
atom1
.
inducedDipole
.
z
*
qJr_0
-
atom1
.
inducedDipole
.
x
*
qJr_2
;
real
uixqkr_2
=
atom1
.
inducedDipole
.
x
*
qJr_1
-
atom1
.
inducedDipole
.
y
*
qJr_0
;
real
rxqkuip_0
=
yr
*
qJuIp_2
-
zr
*
qJuIp_1
;
real
rxqkuip_1
=
zr
*
qJuIp_0
-
xr
*
qJuIp_2
;
real
rxqkuip_2
=
xr
*
qJuIp_1
-
yr
*
qJuIp_0
;
real
rxqkui_0
=
yr
*
qJuI_2
-
zr
*
qJuI_1
;
real
rxqkui_1
=
zr
*
qJuI_0
-
xr
*
qJuI_2
;
real
rxqkui_2
=
xr
*
qJuI_1
-
yr
*
qJuI_0
;
real
ttm3_0
=
rr3
*
dixdk_0
+
gf3
*
dkxr_0
-
gf6
*
rxqkr_0
-
2
*
rr5
*
(
dixqkr_0
+
dkxqir_0
+
rxqkdi_0
-
2
*
qixqk_0
)
-
4
*
rr7
*
(
rxqkir_0
-
qkrxqir_0
);
real
ttm3_1
=
rr3
*
dixdk_1
+
gf3
*
dkxr_1
-
gf6
*
rxqkr_1
-
2
*
rr5
*
(
dixqkr_1
+
dkxqir_1
+
rxqkdi_1
-
2
*
qixqk_1
)
-
4
*
rr7
*
(
rxqkir_1
-
qkrxqir_1
);
real
ttm3_2
=
rr3
*
dixdk_2
+
gf3
*
dkxr_2
-
gf6
*
rxqkr_2
-
2
*
rr5
*
(
dixqkr_2
+
dkxqir_2
+
rxqkdi_2
-
2
*
qixqk_2
)
-
4
*
rr7
*
(
rxqkir_2
-
qkrxqir_2
);
real
ttm3i_0
=
-
(
dkxui_0
*
psc3
+
dkxuip_0
*
dsc3
)
*
0.5
f
+
gti3
*
dkxr_0
-
((
uixqkr_0
+
rxqkui_0
)
*
psc5
+
(
uixqkrp_0
+
rxqkuip_0
)
*
dsc5
)
-
gti6
*
rxqkr_0
;
real
ttm3i_1
=
-
(
dkxui_1
*
psc3
+
dkxuip_1
*
dsc3
)
*
0.5
f
+
gti3
*
dkxr_1
-
((
uixqkr_1
+
rxqkui_1
)
*
psc5
+
(
uixqkrp_1
+
rxqkuip_1
)
*
dsc5
)
-
gti6
*
rxqkr_1
;
real
ttm3i_2
=
-
(
dkxui_2
*
psc3
+
dkxuip_2
*
dsc3
)
*
0.5
f
+
gti3
*
dkxr_2
-
((
uixqkr_2
+
rxqkui_2
)
*
psc5
+
(
uixqkrp_2
+
rxqkuip_2
)
*
dsc5
)
-
gti6
*
rxqkr_2
;
#endif
if
(
mScale
<
1
)
{
#ifdef T1
ttm2_0
*=
mScale
;
ttm2_1
*=
mScale
;
ttm2_2
*=
mScale
;
#endif
#ifdef T3
ttm3_0
*=
mScale
;
ttm3_1
*=
mScale
;
ttm3_2
*=
mScale
;
#endif
}
#ifdef F1
outputForce
.
x
=
-
(
ftm2_0
+
ftm2i_0
);
outputForce
.
y
=
-
(
ftm2_1
+
ftm2i_1
);
outputForce
.
z
=
-
(
ftm2_2
+
ftm2i_2
);
#endif
#ifdef T1
outputForce
.
x
=
(
ttm2_0
+
ttm2i_0
);
outputForce
.
y
=
(
ttm2_1
+
ttm2i_1
);
outputForce
.
z
=
(
ttm2_2
+
ttm2i_2
);
#endif
#ifdef T3
outputForce
.
x
=
(
ttm3_0
+
ttm3i_0
);
outputForce
.
y
=
(
ttm3_1
+
ttm3i_1
);
outputForce
.
z
=
(
ttm3_2
+
ttm3i_2
);
#endif
}
plugins/amoeba/platforms/cuda2/src/kernels/multipoleElectrostatics.cu
0 → 100644
View file @
776f6f22
#define TILE_SIZE 32
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef
struct
{
real4
posq
;
real3
force
,
dipole
,
inducedDipole
,
inducedDipolePolar
;
real
quadrupoleXX
,
quadrupoleXY
,
quadrupoleXZ
;
real
quadrupoleYY
,
quadrupoleYZ
;
float
thole
,
damp
;
}
AtomData
;
__device__
void
computeOneInteractionF1
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
float
dScale
,
float
pScale
,
float
mScale
,
real
&
energy
,
real3
&
outputForce
);
__device__
void
computeOneInteractionT1
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
float
dScale
,
float
pScale
,
float
mScale
,
real3
&
outputForce
);
__device__
void
computeOneInteractionT3
(
AtomData
&
atom1
,
volatile
AtomData
&
atom2
,
float
dScale
,
float
pScale
,
float
mScale
,
real3
&
outputForce
);
inline
__device__
void
loadAtomData
(
AtomData
&
data
,
int
atom
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float2
*
__restrict__
dampingAndThole
)
{
data
.
posq
=
posq
[
atom
];
data
.
dipole
.
x
=
labFrameDipole
[
atom
*
3
];
data
.
dipole
.
y
=
labFrameDipole
[
atom
*
3
+
1
];
data
.
dipole
.
z
=
labFrameDipole
[
atom
*
3
+
2
];
data
.
quadrupoleXX
=
labFrameQuadrupole
[
atom
*
5
];
data
.
quadrupoleXY
=
labFrameQuadrupole
[
atom
*
5
+
1
];
data
.
quadrupoleXZ
=
labFrameQuadrupole
[
atom
*
5
+
2
];
data
.
quadrupoleYY
=
labFrameQuadrupole
[
atom
*
5
+
3
];
data
.
quadrupoleYZ
=
labFrameQuadrupole
[
atom
*
5
+
4
];
data
.
inducedDipole
.
x
=
inducedDipole
[
atom
*
3
];
data
.
inducedDipole
.
y
=
inducedDipole
[
atom
*
3
+
1
];
data
.
inducedDipole
.
z
=
inducedDipole
[
atom
*
3
+
2
];
data
.
inducedDipolePolar
.
x
=
inducedDipolePolar
[
atom
*
3
];
data
.
inducedDipolePolar
.
y
=
inducedDipolePolar
[
atom
*
3
+
1
];
data
.
inducedDipolePolar
.
z
=
inducedDipolePolar
[
atom
*
3
+
2
];
float2
temp
=
dampingAndThole
[
atom
];
data
.
damp
=
temp
.
x
;
data
.
thole
=
temp
.
y
;
}
__device__
real
computeDScaleFactor
(
unsigned
int
polarizationGroup
)
{
return
(
polarizationGroup
&
1
?
0
:
1
);
}
__device__
float
computeMScaleFactor
(
uint2
covalent
)
{
bool
x
=
(
covalent
.
x
&
1
);
bool
y
=
(
covalent
.
y
&
1
);
return
(
x
?
(
y
?
0.0
f
:
0.4
f
)
:
(
y
?
0.8
f
:
1.0
f
));
}
__device__
float
computePScaleFactor
(
uint2
covalent
,
unsigned
int
polarizationGroup
)
{
bool
x
=
(
covalent
.
x
&
1
);
bool
y
=
(
covalent
.
y
&
1
);
bool
p
=
(
polarizationGroup
&
1
);
return
(
x
&&
y
?
0.0
f
:
(
x
&&
p
?
0.5
f
:
1.0
f
));
}
/**
* Compute electrostatic interactions.
*/
extern
"C"
__global__
void
computeElectrostatics
(
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
unsigned
int
*
__restrict__
exclusionIndices
,
const
unsigned
int
*
__restrict__
exclusionRowIndices
,
const
uint2
*
__restrict__
covalentFlags
,
const
unsigned
int
*
__restrict__
polarizationGroupFlags
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
#ifdef USE_CUTOFF
const
ushort2
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
unsigned
int
maxTiles
,
const
unsigned
int
*
__restrict__
interactionFlags
,
#endif
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float2
*
__restrict__
dampingAndThole
)
{
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
#ifdef USE_CUTOFF
const
unsigned
int
numTiles
=
interactionCount
[
0
];
unsigned
int
pos
=
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
numTileIndices
/
totalWarps
:
warp
*
numTiles
/
totalWarps
);
unsigned
int
end
=
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
numTiles
/
totalWarps
);
#else
const
unsigned
int
numTiles
=
numTileIndices
;
unsigned
int
pos
=
startTileIndex
+
warp
*
numTiles
/
totalWarps
;
unsigned
int
end
=
startTileIndex
+
(
warp
+
1
)
*
numTiles
/
totalWarps
;
#endif
real
energy
=
0
;
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
__shared__
unsigned
int
exclusionRange
[
2
*
WARPS_PER_GROUP
];
__shared__
int
exclusionIndex
[
WARPS_PER_GROUP
];
#ifndef ENABLE_SHUFFLE
__shared__
real
tempBuffer
[
3
*
THREAD_BLOCK_SIZE
];
#endif
do
{
// Extract the coordinates of this tile
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
const
unsigned
int
localGroupIndex
=
threadIdx
.
x
/
TILE_SIZE
;
unsigned
int
x
,
y
;
AtomData
data
;
if
(
pos
<
end
)
{
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
ushort2
tileIndices
=
tiles
[
pos
];
x
=
tileIndices
.
x
;
y
=
tileIndices
.
y
;
}
else
#endif
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
}
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
loadAtomData
(
data
,
atom1
,
posq
,
labFrameDipole
,
labFrameQuadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
data
.
force
=
make_real3
(
0
);
// Locate the exclusion data for this tile.
if
(
tgx
<
2
)
exclusionRange
[
2
*
localGroupIndex
+
tgx
]
=
exclusionRowIndices
[
x
+
tgx
];
if
(
tgx
==
0
)
exclusionIndex
[
localGroupIndex
]
=
-
1
;
for
(
unsigned
int
i
=
exclusionRange
[
2
*
localGroupIndex
]
+
tgx
;
i
<
exclusionRange
[
2
*
localGroupIndex
+
1
];
i
+=
TILE_SIZE
)
if
(
exclusionIndices
[
i
]
==
y
)
exclusionIndex
[
localGroupIndex
]
=
i
*
TILE_SIZE
;
bool
hasExclusions
=
(
exclusionIndex
[
localGroupIndex
]
>
-
1
);
if
(
pos
>=
end
)
;
// This warp is done.
else
if
(
x
==
y
)
{
// This tile is on the diagonal.
localData
[
threadIdx
.
x
].
posq
=
data
.
posq
;
localData
[
threadIdx
.
x
].
dipole
=
data
.
dipole
;
localData
[
threadIdx
.
x
].
quadrupoleXX
=
data
.
quadrupoleXX
;
localData
[
threadIdx
.
x
].
quadrupoleXY
=
data
.
quadrupoleXY
;
localData
[
threadIdx
.
x
].
quadrupoleXZ
=
data
.
quadrupoleXZ
;
localData
[
threadIdx
.
x
].
quadrupoleYY
=
data
.
quadrupoleYY
;
localData
[
threadIdx
.
x
].
quadrupoleYZ
=
data
.
quadrupoleYZ
;
localData
[
threadIdx
.
x
].
inducedDipole
=
data
.
inducedDipole
;
localData
[
threadIdx
.
x
].
inducedDipolePolar
=
data
.
inducedDipolePolar
;
localData
[
threadIdx
.
x
].
thole
=
data
.
thole
;
// IS THIS CORRECT?
localData
[
threadIdx
.
x
].
damp
=
data
.
damp
;
// IS THIS CORRECT?
uint2
covalent
=
covalentFlags
[
exclusionIndex
[
localGroupIndex
]
+
tgx
];
unsigned
int
polarizationGroup
=
polarizationGroupFlags
[
exclusionIndex
[
localGroupIndex
]
+
tgx
];
// Compute forces.
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
y
*
TILE_SIZE
+
j
;
if
(
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
real3
tempForce
;
real
tempEnergy
;
float
d
=
computeDScaleFactor
(
polarizationGroup
);
float
p
=
computePScaleFactor
(
covalent
,
polarizationGroup
);
float
m
=
computeMScaleFactor
(
covalent
);
computeOneInteractionF1
(
data
,
localData
[
tbx
+
j
],
d
,
p
,
m
,
tempEnergy
,
tempForce
);
data
.
force
+=
tempForce
;
energy
+=
0.5
f
*
tempEnergy
;
}
covalent
.
x
>>=
1
;
covalent
.
y
>>=
1
;
polarizationGroup
>>=
1
;
}
data
.
force
*=
ENERGY_SCALE_FACTOR
;
atomicAdd
(
&
forceBuffers
[
atom1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
z
*
0xFFFFFFFF
)));
// Compute torques.
data
.
force
=
make_real3
(
0
);
covalent
=
covalentFlags
[
exclusionIndex
[
localGroupIndex
]
+
tgx
];
polarizationGroup
=
polarizationGroupFlags
[
exclusionIndex
[
localGroupIndex
]
+
tgx
];
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
y
*
TILE_SIZE
+
j
;
if
(
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
real3
tempForce
;
float
d
=
computeDScaleFactor
(
polarizationGroup
);
float
p
=
computePScaleFactor
(
covalent
,
polarizationGroup
);
float
m
=
computeMScaleFactor
(
covalent
);
computeOneInteractionT1
(
data
,
localData
[
tbx
+
j
],
d
,
p
,
m
,
tempForce
);
data
.
force
+=
tempForce
;
}
covalent
.
x
>>=
1
;
covalent
.
y
>>=
1
;
polarizationGroup
>>=
1
;
}
data
.
force
*=
ENERGY_SCALE_FACTOR
;
atomicAdd
(
&
forceBuffers
[
atom1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
z
*
0xFFFFFFFF
)));
}
else
{
// This is an off-diagonal tile.
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
loadAtomData
(
localData
[
threadIdx
.
x
],
j
,
posq
,
labFrameDipole
,
labFrameQuadrupole
,
inducedDipole
,
inducedDipolePolar
,
dampingAndThole
);
localData
[
threadIdx
.
x
].
force
=
make_real3
(
0
);
#ifdef USE_CUTOFF
unsigned
int
flags
=
(
numTiles
<=
maxTiles
?
interactionFlags
[
pos
]
:
0xFFFFFFFF
);
if
(
!
hasExclusions
&&
flags
!=
0xFFFFFFFF
)
{
if
(
flags
==
0
)
{
// No interactions in this tile.
}
else
{
// Compute only a subset of the interactions in this tile.
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
int
atom2
=
tbx
+
j
;
int
bufferIndex
=
3
*
threadIdx
.
x
;
real3
dEdR1
=
make_real3
(
0
);
real3
dEdR2
=
make_real3
(
0
);
real3
delta
=
make_real3
(
localData
[
atom2
].
posq
.
x
-
data
.
posq
.
x
,
localData
[
atom2
].
posq
.
y
-
data
.
posq
.
y
,
localData
[
atom2
].
posq
.
z
-
data
.
posq
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
);
LOAD_ATOM2_PARAMETERS
atom2
=
y
*
TILE_SIZE
+
j
;
COMPUTE_INTERACTION
#ifdef USE_CUTOFF
}
#endif
#ifdef ENABLE_SHUFFLE
force
.
x
-=
dEdR1
.
x
;
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
for
(
int
i
=
16
;
i
>=
1
;
i
/=
2
)
{
dEdR2
.
x
+=
__shfl_xor
(
dEdR2
.
x
,
i
,
32
);
dEdR2
.
y
+=
__shfl_xor
(
dEdR2
.
y
,
i
,
32
);
dEdR2
.
z
+=
__shfl_xor
(
dEdR2
.
z
,
i
,
32
);
}
if
(
tgx
==
0
)
{
localData
[
tbx
+
j
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
j
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
j
].
fz
+=
dEdR2
.
z
;
}
#else
force
.
x
-=
dEdR1
.
x
;
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
tempBuffer
[
bufferIndex
]
=
dEdR2
.
x
;
tempBuffer
[
bufferIndex
+
1
]
=
dEdR2
.
y
;
tempBuffer
[
bufferIndex
+
2
]
=
dEdR2
.
z
;
// Sum the forces on atom2.
if
(
tgx
%
4
==
0
)
{
tempBuffer
[
bufferIndex
]
+=
tempBuffer
[
bufferIndex
+
3
]
+
tempBuffer
[
bufferIndex
+
6
]
+
tempBuffer
[
bufferIndex
+
9
];
tempBuffer
[
bufferIndex
+
1
]
+=
tempBuffer
[
bufferIndex
+
4
]
+
tempBuffer
[
bufferIndex
+
7
]
+
tempBuffer
[
bufferIndex
+
10
];
tempBuffer
[
bufferIndex
+
2
]
+=
tempBuffer
[
bufferIndex
+
5
]
+
tempBuffer
[
bufferIndex
+
8
]
+
tempBuffer
[
bufferIndex
+
11
];
}
if
(
tgx
==
0
)
{
localData
[
tbx
+
j
].
fx
+=
tempBuffer
[
bufferIndex
]
+
tempBuffer
[
bufferIndex
+
12
]
+
tempBuffer
[
bufferIndex
+
24
]
+
tempBuffer
[
bufferIndex
+
36
]
+
tempBuffer
[
bufferIndex
+
48
]
+
tempBuffer
[
bufferIndex
+
60
]
+
tempBuffer
[
bufferIndex
+
72
]
+
tempBuffer
[
bufferIndex
+
84
];
localData
[
tbx
+
j
].
fy
+=
tempBuffer
[
bufferIndex
+
1
]
+
tempBuffer
[
bufferIndex
+
13
]
+
tempBuffer
[
bufferIndex
+
25
]
+
tempBuffer
[
bufferIndex
+
37
]
+
tempBuffer
[
bufferIndex
+
49
]
+
tempBuffer
[
bufferIndex
+
61
]
+
tempBuffer
[
bufferIndex
+
73
]
+
tempBuffer
[
bufferIndex
+
85
];
localData
[
tbx
+
j
].
fz
+=
tempBuffer
[
bufferIndex
+
2
]
+
tempBuffer
[
bufferIndex
+
14
]
+
tempBuffer
[
bufferIndex
+
26
]
+
tempBuffer
[
bufferIndex
+
38
]
+
tempBuffer
[
bufferIndex
+
50
]
+
tempBuffer
[
bufferIndex
+
62
]
+
tempBuffer
[
bufferIndex
+
74
]
+
tempBuffer
[
bufferIndex
+
86
];
}
#endif
}
}
}
}
else
#endif
{
// Compute the full set of interactions in this tile.
uint2
covalent
=
(
hasExclusions
?
covalentFlags
[
exclusionIndex
[
localGroupIndex
]
+
tgx
]
:
make_uint2
(
0
,
0
));
unsigned
int
polarizationGroup
=
(
hasExclusions
?
polarizationGroupFlags
[
exclusionIndex
[
localGroupIndex
]
+
tgx
]
:
0
);
covalent
.
x
=
(
covalent
.
x
>>
tgx
)
|
(
covalent
.
x
<<
(
TILE_SIZE
-
tgx
));
covalent
.
y
=
(
covalent
.
y
>>
tgx
)
|
(
covalent
.
y
<<
(
TILE_SIZE
-
tgx
));
polarizationGroup
=
(
polarizationGroup
>>
tgx
)
|
(
polarizationGroup
<<
(
TILE_SIZE
-
tgx
));
// Compute forces.
unsigned
int
tj
=
tgx
;
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
y
*
TILE_SIZE
+
tj
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
real3
tempForce
;
real
tempEnergy
;
float
d
=
computeDScaleFactor
(
polarizationGroup
);
float
p
=
computePScaleFactor
(
covalent
,
polarizationGroup
);
float
m
=
computeMScaleFactor
(
covalent
);
computeOneInteractionF1
(
data
,
localData
[
tbx
+
tj
],
d
,
p
,
m
,
tempEnergy
,
tempForce
);
data
.
force
+=
tempForce
;
localData
[
tbx
+
tj
].
force
-=
tempForce
;
energy
+=
tempEnergy
;
}
covalent
.
x
>>=
1
;
covalent
.
y
>>=
1
;
polarizationGroup
>>=
1
;
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
data
.
force
*=
ENERGY_SCALE_FACTOR
;
localData
[
threadIdx
.
x
].
force
*=
ENERGY_SCALE_FACTOR
;
if
(
pos
<
end
)
{
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
z
*
0xFFFFFFFF
)));
offset
=
y
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
z
*
0xFFFFFFFF
)));
}
// Compute torques.
covalent
=
(
hasExclusions
?
covalentFlags
[
exclusionIndex
[
localGroupIndex
]
+
tgx
]
:
make_uint2
(
0
,
0
));
polarizationGroup
=
(
hasExclusions
?
polarizationGroupFlags
[
exclusionIndex
[
localGroupIndex
]
+
tgx
]
:
0
);
covalent
.
x
=
(
covalent
.
x
>>
tgx
)
|
(
covalent
.
x
<<
(
TILE_SIZE
-
tgx
));
covalent
.
y
=
(
covalent
.
y
>>
tgx
)
|
(
covalent
.
y
<<
(
TILE_SIZE
-
tgx
));
polarizationGroup
=
(
polarizationGroup
>>
tgx
)
|
(
polarizationGroup
<<
(
TILE_SIZE
-
tgx
));
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
y
*
TILE_SIZE
+
tj
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
real3
tempForce
;
float
d
=
computeDScaleFactor
(
polarizationGroup
);
float
p
=
computePScaleFactor
(
covalent
,
polarizationGroup
);
float
m
=
computeMScaleFactor
(
covalent
);
computeOneInteractionT1
(
data
,
localData
[
tbx
+
tj
],
d
,
p
,
m
,
tempForce
);
data
.
force
+=
tempForce
;
computeOneInteractionT3
(
data
,
localData
[
tbx
+
tj
],
d
,
p
,
m
,
tempForce
);
localData
[
tbx
+
tj
].
force
+=
tempForce
;
}
covalent
.
x
>>=
1
;
covalent
.
y
>>=
1
;
polarizationGroup
>>=
1
;
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
data
.
force
*=
ENERGY_SCALE_FACTOR
;
localData
[
threadIdx
.
x
].
force
*=
ENERGY_SCALE_FACTOR
;
if
(
pos
<
end
)
{
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
data
.
force
.
z
*
0xFFFFFFFF
)));
offset
=
y
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
x
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
y
*
0xFFFFFFFF
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
force
.
z
*
0xFFFFFFFF
)));
}
}
}
}
pos
++
;
}
while
(
pos
<
end
);
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
}
plugins/amoeba/platforms/cuda2/src/kernels/multipoleFixedField.cu
View file @
776f6f22
...
...
@@ -14,12 +14,12 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res
data
.
dipole
.
x
=
labFrameDipole
[
atom
*
3
];
data
.
dipole
.
y
=
labFrameDipole
[
atom
*
3
+
1
];
data
.
dipole
.
z
=
labFrameDipole
[
atom
*
3
+
2
];
data
.
quadrupoleXX
=
labFrameQuadrupole
[
atom
*
9
];
data
.
quadrupoleXY
=
labFrameQuadrupole
[
atom
*
9
+
1
];
data
.
quadrupoleXZ
=
labFrameQuadrupole
[
atom
*
9
+
2
];
data
.
quadrupoleYY
=
labFrameQuadrupole
[
atom
*
9
+
4
];
data
.
quadrupoleYZ
=
labFrameQuadrupole
[
atom
*
9
+
5
];
data
.
quadrupoleZZ
=
labFrameQuadrupole
[
atom
*
9
+
8
]
;
data
.
quadrupoleXX
=
labFrameQuadrupole
[
atom
*
5
];
data
.
quadrupoleXY
=
labFrameQuadrupole
[
atom
*
5
+
1
];
data
.
quadrupoleXZ
=
labFrameQuadrupole
[
atom
*
5
+
2
];
data
.
quadrupoleYY
=
labFrameQuadrupole
[
atom
*
5
+
3
];
data
.
quadrupoleYZ
=
labFrameQuadrupole
[
atom
*
5
+
4
];
data
.
quadrupoleZZ
=
1
-
data
.
quadrupoleXX
-
data
.
quadrupoleYY
;
float2
temp
=
dampingAndThole
[
atom
];
data
.
damp
=
temp
.
x
;
data
.
thole
=
temp
.
y
;
...
...
@@ -38,7 +38,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
float
damp
=
atom1
.
damp
*
atom2
.
damp
;
real
dampExp
;
if
(
damp
!=
0
&&
r
<
SCALING_DISTANCE_CUTOFF
)
{
if
(
damp
!=
0
)
{
// get scaling factors
...
...
@@ -83,7 +83,7 @@ __device__ real computeDScaleFactor(unsigned int polarizationGroup) {
return
(
polarizationGroup
&
1
?
0
:
1
);
}
__device__
float
compute
D
ScaleFactor
(
uint2
covalent
)
{
__device__
float
compute
M
ScaleFactor
(
uint2
covalent
)
{
// int f1 = (value == 0 || value == 1 ? 1 : 0);
// int f2 = (value == 0 || value == 2 ? 1 : 0);
// 0 = 12 or 13: x and y: 0
...
...
@@ -106,7 +106,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
*/
extern
"C"
__global__
void
computeFixedField
(
unsigned
long
long
*
__restrict__
fieldBuffers
,
unsigned
long
long
*
__restrict__
fieldPolarBuffers
,
const
real4
*
__restrict__
posq
,
const
unsigned
int
*
__restrict__
exclusions
,
const
unsigned
int
*
__restrict__
exclusionIndices
,
const
unsigned
int
*
__restrict__
exclusionRowIndices
,
const
unsigned
int
*
__restrict__
exclusionIndices
,
const
unsigned
int
*
__restrict__
exclusionRowIndices
,
const
uint2
*
__restrict__
covalentFlags
,
const
unsigned
int
*
__restrict__
polarizationGroupFlags
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
#ifdef USE_CUTOFF
const
ushort2
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
unsigned
int
maxTiles
,
const
unsigned
int
*
__restrict__
interactionFlags
,
...
...
@@ -185,11 +185,9 @@ extern "C" __global__ void computeFixedField(
localData
[
localAtomIndex
].
quadrupoleZZ
=
data
.
quadrupoleZZ
;
localData
[
localAtomIndex
].
thole
=
data
.
thole
;
// IS THIS CORRECT?
localData
[
localAtomIndex
].
damp
=
data
.
damp
;
// IS THIS CORRECT?
unsigned
int
excl
=
exclusions
[
exclusionIndex
[
localGroupIndex
]
+
tgx
];
uint2
covalent
=
covalentFlags
[
exclusionIndex
[
localGroupIndex
]
+
tgx
];
unsigned
int
polarizationGroup
=
polarizationGroupFlags
[
exclusionIndex
[
localGroupIndex
]
+
tgx
];
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
bool
isExcluded
=
!
(
excl
&
0x1
);
int
atom2
=
tbx
+
j
;
real3
delta
=
make_real3
(
localData
[
atom2
].
posq
.
x
-
data
.
posq
.
x
,
localData
[
atom2
].
posq
.
y
-
data
.
posq
.
y
,
localData
[
atom2
].
posq
.
z
-
data
.
posq
.
z
);
#ifdef USE_PERIODIC
...
...
@@ -200,13 +198,13 @@ extern "C" __global__ void computeFixedField(
real3
field1
;
real3
field2
;
computeOneInteraction
(
data
,
localData
[
atom2
],
delta
,
field1
,
field2
);
if
(
!
isExcluded
)
{
float
d
=
computeDScaleFactor
(
covalent
);
atom2
=
y
*
TILE_SIZE
+
j
;
if
(
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
float
d
=
computeDScaleFactor
(
polarizationGroup
);
data
.
field
+=
d
*
field1
;
float
p
=
computePScaleFactor
(
covalent
,
polarizationGroup
);
data
.
fieldPolar
+=
p
*
field1
;
}
excl
>>=
1
;
covalent
.
x
>>=
1
;
covalent
.
y
>>=
1
;
polarizationGroup
>>=
1
;
...
...
@@ -231,7 +229,6 @@ extern "C" __global__ void computeFixedField(
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
bool
isExcluded
=
false
;
int
atom2
=
tbx
+
j
;
int
bufferIndex
=
3
*
threadIdx
.
x
;
real3
dEdR1
=
make_real3
(
0
);
...
...
@@ -298,16 +295,13 @@ extern "C" __global__ void computeFixedField(
{
// Compute the full set of interactions in this tile.
unsigned
int
excl
=
(
hasExclusions
?
exclusions
[
exclusionIndex
[
localGroupIndex
]
+
tgx
]
:
0xFFFFFFFF
);
uint2
covalent
=
(
hasExclusions
?
covalentFlags
[
exclusionIndex
[
localGroupIndex
]
+
tgx
]
:
make_uint2
(
0
,
0
));
unsigned
int
polarizationGroup
=
(
hasExclusions
?
polarizationGroupFlags
[
exclusionIndex
[
localGroupIndex
]
+
tgx
]
:
0
);
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
TILE_SIZE
-
tgx
));
covalent
.
x
=
(
covalent
.
x
>>
tgx
)
|
(
covalent
.
x
<<
(
TILE_SIZE
-
tgx
));
covalent
.
y
=
(
covalent
.
y
>>
tgx
)
|
(
covalent
.
y
<<
(
TILE_SIZE
-
tgx
));
polarizationGroup
=
(
polarizationGroup
>>
tgx
)
|
(
polarizationGroup
<<
(
TILE_SIZE
-
tgx
));
unsigned
int
tj
=
tgx
;
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
bool
isExcluded
=
!
(
excl
&
0x1
);
int
atom2
=
tbx
+
tj
;
real3
delta
=
make_real3
(
localData
[
atom2
].
posq
.
x
-
data
.
posq
.
x
,
localData
[
atom2
].
posq
.
y
-
data
.
posq
.
y
,
localData
[
atom2
].
posq
.
z
-
data
.
posq
.
z
);
#ifdef USE_PERIODIC
...
...
@@ -318,15 +312,14 @@ extern "C" __global__ void computeFixedField(
real3
field1
;
real3
field2
;
computeOneInteraction
(
data
,
localData
[
atom2
],
delta
,
field1
,
field2
);
if
(
!
isExcluded
)
{
float
d
=
computeDScaleFactor
(
covalent
);
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
float
d
=
computeDScaleFactor
(
polarizationGroup
);
data
.
field
+=
d
*
field1
;
localData
[
atom2
].
field
+=
d
*
field2
;
float
p
=
computePScaleFactor
(
covalent
,
polarizationGroup
);
data
.
fieldPolar
+=
p
*
field1
;
localData
[
atom2
].
fieldPolar
+=
p
*
field2
;
}
excl
>>=
1
;
covalent
.
x
>>=
1
;
covalent
.
y
>>=
1
;
polarizationGroup
>>=
1
;
...
...
plugins/amoeba/platforms/cuda2/src/kernels/multipoles.cu
View file @
776f6f22
...
...
@@ -8,10 +8,10 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
// code common to ZThenX and Bisector
for
(
int
particleIndex
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
particleIndex
<
NUM_ATOMS
;
particleIndex
+=
gridDim
.
x
*
blockDim
.
x
)
{
int4
particles
=
multipoleParticles
[
particleIndex
];
for
(
int
atom
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
atom
<
NUM_ATOMS
;
atom
+=
gridDim
.
x
*
blockDim
.
x
)
{
int4
particles
=
multipoleParticles
[
atom
];
if
(
particles
.
x
>=
0
&&
particles
.
z
>=
0
)
{
real4
thisParticlePos
=
posq
[
particleIndex
];
real4
thisParticlePos
=
posq
[
atom
];
real4
posZ
=
posq
[
particles
.
z
];
real3
vectorZ
=
make_real3
(
posZ
.
x
-
thisParticlePos
.
x
,
posZ
.
y
-
thisParticlePos
.
y
,
posZ
.
z
-
thisParticlePos
.
z
);
real4
posX
=
posq
[
particles
.
x
];
...
...
@@ -149,7 +149,7 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
// Transform the dipole
unsigned
int
offset
=
3
*
particleIndex
;
unsigned
int
offset
=
3
*
atom
;
real
molDipole
[
3
];
molDipole
[
0
]
=
molecularDipoles
[
offset
];
molDipole
[
1
]
=
molecularDipoles
[
offset
+
1
];
...
...
@@ -164,55 +164,47 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
// Transform the quadrupole
real
mPole
[
3
][
3
];
offset
=
9
*
particleIndex
;
mPole
[
0
][
0
]
=
molecularQuadrupoles
[
offset
];
mPole
[
0
][
1
]
=
molecularQuadrupoles
[
offset
+
1
];
mPole
[
0
][
2
]
=
molecularQuadrupoles
[
offset
+
2
];
mPole
[
1
][
0
]
=
molecularQuadrupoles
[
offset
+
3
];
mPole
[
1
][
1
]
=
molecularQuadrupoles
[
offset
+
4
];
mPole
[
1
][
2
]
=
molecularQuadrupoles
[
offset
+
5
];
mPole
[
2
][
0
]
=
molecularQuadrupoles
[
offset
+
6
];
mPole
[
2
][
1
]
=
molecularQuadrupoles
[
offset
+
7
];
mPole
[
2
][
2
]
=
molecularQuadrupoles
[
offset
+
8
];
offset
=
5
*
atom
;
real
mPoleXX
=
molecularQuadrupoles
[
offset
];
real
mPoleXY
=
molecularQuadrupoles
[
offset
+
1
];
real
mPoleXZ
=
molecularQuadrupoles
[
offset
+
2
];
real
mPoleYY
=
molecularQuadrupoles
[
offset
+
3
];
real
mPoleYZ
=
molecularQuadrupoles
[
offset
+
4
];
real
mPoleZZ
=
1
-
mPoleXX
-
mPoleYY
;
if
(
reverse
)
{
mPole
[
0
][
1
]
*=
-
1
;
mPole
[
1
][
0
]
*=
-
1
;
mPole
[
1
][
2
]
*=
-
1
;
mPole
[
2
][
1
]
*=
-
1
;
mPoleXY
*=
-
1
;
mPoleYZ
*=
-
1
;
}
labFrameQuadrupoles
[
offset
+
8
]
=
vectorX
.
z
*
(
vectorX
.
z
*
mPole
[
0
][
0
]
+
vectorY
.
z
*
mPole
[
0
][
1
]
+
vectorZ
.
z
*
mPole
[
0
][
2
]);
labFrameQuadrupoles
[
offset
+
8
]
+=
vectorY
.
z
*
(
vectorX
.
z
*
mPole
[
1
][
0
]
+
vectorY
.
z
*
mPole
[
1
][
1
]
+
vectorZ
.
z
*
mPole
[
1
][
2
]);
labFrameQuadrupoles
[
offset
+
8
]
+=
vectorZ
.
z
*
(
vectorX
.
z
*
mPole
[
2
][
0
]
+
vectorY
.
z
*
mPole
[
2
][
1
]
+
vectorZ
.
z
*
mPole
[
2
][
2
]);
labFrameQuadrupoles
[
offset
+
4
]
=
vectorX
.
y
*
(
vectorX
.
y
*
mPole
[
0
][
0
]
+
vectorY
.
y
*
mPole
[
0
][
1
]
+
vectorZ
.
y
*
mPole
[
0
][
2
]);
labFrameQuadrupoles
[
offset
+
4
]
+=
vectorY
.
y
*
(
vectorX
.
y
*
mPole
[
1
][
0
]
+
vectorY
.
y
*
mPole
[
1
][
1
]
+
vectorZ
.
y
*
mPole
[
1
][
2
]);
labFrameQuadrupoles
[
offset
+
4
]
+=
vectorZ
.
y
*
(
vectorX
.
y
*
mPole
[
2
][
0
]
+
vectorY
.
y
*
mPole
[
2
][
1
]
+
vectorZ
.
y
*
mPole
[
2
][
2
]);
labFrameQuadrupoles
[
offset
+
5
]
=
vectorX
.
y
*
(
vectorX
.
z
*
mPole
[
0
][
0
]
+
vectorY
.
z
*
mPole
[
0
][
1
]
+
vectorZ
.
z
*
mPole
[
0
][
2
]);
labFrameQuadrupoles
[
offset
+
5
]
+=
vectorY
.
y
*
(
vectorX
.
z
*
mPole
[
1
][
0
]
+
vectorY
.
z
*
mPole
[
1
][
1
]
+
vectorZ
.
z
*
mPole
[
1
][
2
]);
labFrameQuadrupoles
[
offset
+
5
]
+=
vectorZ
.
y
*
(
vectorX
.
z
*
mPole
[
2
][
0
]
+
vectorY
.
z
*
mPole
[
2
][
1
]
+
vectorZ
.
z
*
mPole
[
2
][
2
]);
labFrameQuadrupoles
[
offset
]
=
vectorX
.
x
*
(
vectorX
.
x
*
mPole
[
0
][
0
]
+
vectorY
.
x
*
mPole
[
0
][
1
]
+
vectorZ
.
x
*
mPole
[
0
][
2
]);
labFrameQuadrupoles
[
offset
]
+=
vectorY
.
x
*
(
vectorX
.
x
*
mPole
[
1
][
0
]
+
vectorY
.
x
*
mPole
[
1
][
1
]
+
vectorZ
.
x
*
mPole
[
1
][
2
]);
labFrameQuadrupoles
[
offset
]
+=
vectorZ
.
x
*
(
vectorX
.
x
*
mPole
[
2
][
0
]
+
vectorY
.
x
*
mPole
[
2
][
1
]
+
vectorZ
.
x
*
mPole
[
2
][
2
]);
labFrameQuadrupoles
[
offset
+
1
]
=
vectorX
.
x
*
(
vectorX
.
y
*
mPole
[
0
][
0
]
+
vectorY
.
y
*
mPole
[
0
][
1
]
+
vectorZ
.
y
*
mPole
[
0
][
2
]);
labFrameQuadrupoles
[
offset
+
1
]
+=
vectorY
.
x
*
(
vectorX
.
y
*
mPole
[
1
][
0
]
+
vectorY
.
y
*
mPole
[
1
][
1
]
+
vectorZ
.
y
*
mPole
[
1
][
2
]);
labFrameQuadrupoles
[
offset
+
1
]
+=
vectorZ
.
x
*
(
vectorX
.
y
*
mPole
[
2
][
0
]
+
vectorY
.
y
*
mPole
[
2
][
1
]
+
vectorZ
.
y
*
mPole
[
2
][
2
]);
labFrameQuadrupoles
[
offset
+
2
]
=
vectorX
.
x
*
(
vectorX
.
z
*
mPole
[
0
][
0
]
+
vectorY
.
z
*
mPole
[
0
][
1
]
+
vectorZ
.
z
*
mPole
[
0
][
2
]);
labFrameQuadrupoles
[
offset
+
2
]
+=
vectorY
.
x
*
(
vectorX
.
z
*
mPole
[
1
][
0
]
+
vectorY
.
z
*
mPole
[
1
][
1
]
+
vectorZ
.
z
*
mPole
[
1
][
2
]);
labFrameQuadrupoles
[
offset
+
2
]
+=
vectorZ
.
x
*
(
vectorX
.
z
*
mPole
[
2
][
0
]
+
vectorY
.
z
*
mPole
[
2
][
1
]
+
vectorZ
.
z
*
mPole
[
2
][
2
]);
labFrameQuadrupoles
[
offset
+
3
]
=
labFrameQuadrupoles
[
offset
+
1
];
labFrameQuadrupoles
[
offset
+
6
]
=
labFrameQuadrupoles
[
offset
+
2
];
labFrameQuadrupoles
[
offset
+
7
]
=
labFrameQuadrupoles
[
offset
+
5
];
labFrameQuadrupoles
[
offset
]
=
vectorX
.
x
*
(
vectorX
.
x
*
mPoleXX
+
vectorY
.
x
*
mPoleXY
+
vectorZ
.
x
*
mPoleXZ
);
+
vectorY
.
x
*
(
vectorX
.
x
*
mPoleXY
+
vectorY
.
x
*
mPoleYY
+
vectorZ
.
x
*
mPoleYZ
);
+
vectorZ
.
x
*
(
vectorX
.
x
*
mPoleXZ
+
vectorY
.
x
*
mPoleYZ
+
vectorZ
.
x
*
mPoleZZ
);
labFrameQuadrupoles
[
offset
+
1
]
=
vectorX
.
x
*
(
vectorX
.
y
*
mPoleXX
+
vectorY
.
y
*
mPoleXY
+
vectorZ
.
y
*
mPoleXZ
);
+
vectorY
.
x
*
(
vectorX
.
y
*
mPoleXY
+
vectorY
.
y
*
mPoleYY
+
vectorZ
.
y
*
mPoleYZ
);
+
vectorZ
.
x
*
(
vectorX
.
y
*
mPoleXZ
+
vectorY
.
y
*
mPoleYZ
+
vectorZ
.
y
*
mPoleZZ
);
labFrameQuadrupoles
[
offset
+
2
]
=
vectorX
.
x
*
(
vectorX
.
z
*
mPoleXX
+
vectorY
.
z
*
mPoleXY
+
vectorZ
.
z
*
mPoleXZ
);
+
vectorY
.
x
*
(
vectorX
.
z
*
mPoleXY
+
vectorY
.
z
*
mPoleYY
+
vectorZ
.
z
*
mPoleYZ
);
+
vectorZ
.
x
*
(
vectorX
.
z
*
mPoleXZ
+
vectorY
.
z
*
mPoleYZ
+
vectorZ
.
z
*
mPoleZZ
);
labFrameQuadrupoles
[
offset
+
3
]
=
vectorX
.
y
*
(
vectorX
.
y
*
mPoleXX
+
vectorY
.
y
*
mPoleXY
+
vectorZ
.
y
*
mPoleXZ
);
+
vectorY
.
y
*
(
vectorX
.
y
*
mPoleXY
+
vectorY
.
y
*
mPoleYY
+
vectorZ
.
y
*
mPoleYZ
);
+
vectorZ
.
y
*
(
vectorX
.
y
*
mPoleXZ
+
vectorY
.
y
*
mPoleYZ
+
vectorZ
.
y
*
mPoleZZ
);
labFrameQuadrupoles
[
offset
+
4
]
=
vectorX
.
y
*
(
vectorX
.
z
*
mPoleXX
+
vectorY
.
z
*
mPoleXY
+
vectorZ
.
z
*
mPoleXZ
);
+
vectorY
.
y
*
(
vectorX
.
z
*
mPoleXY
+
vectorY
.
z
*
mPoleYY
+
vectorZ
.
z
*
mPoleYZ
);
+
vectorZ
.
y
*
(
vectorX
.
z
*
mPoleXZ
+
vectorY
.
z
*
mPoleYZ
+
vectorZ
.
z
*
mPoleZZ
);
}
}
}
extern
"C"
__global__
void
recordInducedDipoles
(
const
long
long
*
__restrict__
fieldBuffers
,
const
long
long
*
__restrict__
fieldPolarBuffers
,
real
*
__restrict__
inducedDipole
,
real
*
__restrict__
inducedDipolePolar
,
const
float
*
__restrict__
polarizability
)
{
for
(
int
atom
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
atom
<
NUM_ATOMS
;
atom
+=
gridDim
.
x
*
blockDim
.
x
)
{
real
scale
=
polarizability
[
atom
]
/
(
real
)
0xFFFFFFFF
;
inducedDipole
[
3
*
atom
]
=
scale
*
fieldBuffers
[
atom
];
inducedDipole
[
3
*
atom
+
1
]
=
scale
*
fieldBuffers
[
atom
+
PADDED_NUM_ATOMS
];
inducedDipole
[
3
*
atom
+
2
]
=
scale
*
fieldBuffers
[
atom
+
PADDED_NUM_ATOMS
*
2
];
inducedDipolePolar
[
3
*
atom
]
=
scale
*
fieldPolarBuffers
[
atom
];
inducedDipolePolar
[
3
*
atom
+
1
]
=
scale
*
fieldPolarBuffers
[
atom
+
PADDED_NUM_ATOMS
];
inducedDipolePolar
[
3
*
atom
+
2
]
=
scale
*
fieldPolarBuffers
[
atom
+
PADDED_NUM_ATOMS
*
2
];
}
}
\ No newline at end of file
plugins/amoeba/platforms/cuda2/tests/TestCudaAmoebaVdwForce.cpp
View file @
776f6f22
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008 Stanford University and the Authors.
*
* Portions copyright (c) 2008
-2012
Stanford University and the Authors. *
* Authors: Mark Friedrichs *
* Contributors: *
* *
...
...
@@ -42,6 +42,13 @@
#include "openmm/LangevinIntegrator.h"
#include <iostream>
#include <vector>
#include <stdlib.h>
#include <stdio.h>
#define ASSERT_EQUAL_TOL_MOD(expected, found, tol, testname) {double _scale_ = std::abs(expected) > 1.0 ? std::abs(expected) : 1.0; if (!(std::abs((expected)-(found))/_scale_ <= (tol))) {std::stringstream details; details << testname << " Expected "<<(expected)<<", found "<<(found); throwException(__FILE__, __LINE__, details.str());}};
#define ASSERT_EQUAL_VEC_MOD(expected, found, tol,testname) {ASSERT_EQUAL_TOL_MOD((expected)[0], (found)[0], (tol),(testname)); ASSERT_EQUAL_TOL_MOD((expected)[1], (found)[1], (tol),(testname)); ASSERT_EQUAL_TOL_MOD((expected)[2], (found)[2], (tol),(testname));};
using
namespace
OpenMM
;
const
double
TOL
=
1e-4
;
...
...
@@ -56,22 +63,20 @@ void testVdw( FILE* log ) {
std
::
string
epsilonCombiningRule
=
std
::
string
(
"HHG"
);
amoebaVdwForce
->
setEpsilonCombiningRule
(
epsilonCombiningRule
);
int
classIndex
=
0
;
for
(
int
ii
=
0
;
ii
<
numberOfParticles
;
ii
++
){
int
indexIV
,
indexClass
;
int
indexIV
;
double
mass
,
sigma
,
epsilon
,
reduction
;
std
::
vector
<
int
>
exclusions
;
if
(
ii
==
0
||
ii
==
3
){
mass
=
16.0
;
indexIV
=
ii
;
indexClass
=
70
;
sigma
=
1.70250E+00
;
epsilon
=
1.10000E-01
;
reduction
=
0.0
;
}
else
{
mass
=
1.0
;
indexIV
=
ii
<
3
?
0
:
3
;
indexClass
=
71
;
sigma
=
1.32750E+00
;
epsilon
=
1.35000E-02
;
reduction
=
0.91
;
...
...
@@ -89,7 +94,7 @@ void testVdw( FILE* log ) {
exclusions
.
push_back
(
5
);
}
system
.
addParticle
(
mass
);
amoebaVdwForce
->
addParticle
(
indexIV
,
indexClass
,
sigma
,
epsilon
,
reduction
);
amoebaVdwForce
->
addParticle
(
indexIV
,
classIndex
,
sigma
,
epsilon
,
reduction
);
amoebaVdwForce
->
setParticleExclusions
(
ii
,
exclusions
);
}
LangevinIntegrator
integrator
(
0.0
,
0.1
,
0.01
);
...
...
@@ -130,12 +135,13 @@ void testVdw( FILE* log ) {
positions
[
ii
][
2
]
*=
AngstromToNm
;
}
for
(
int
ii
=
0
;
ii
<
amoebaVdwForce
->
getNumParticles
();
ii
++
){
int
indexIV
,
indexClass
;
int
indexIV
;
int
classIndex
;
double
sigma
,
epsilon
,
reduction
;
amoebaVdwForce
->
getParticleParameters
(
ii
,
indexIV
,
indexClass
,
sigma
,
epsilon
,
reduction
);
amoebaVdwForce
->
getParticleParameters
(
ii
,
indexIV
,
classIndex
,
sigma
,
epsilon
,
reduction
);
sigma
*=
AngstromToNm
;
epsilon
*=
CalToJoule
;
amoebaVdwForce
->
setParticleParameters
(
ii
,
indexIV
,
indexClass
,
sigma
,
epsilon
,
reduction
);
amoebaVdwForce
->
setParticleParameters
(
ii
,
indexIV
,
classIndex
,
sigma
,
epsilon
,
reduction
);
}
platformName
=
"CUDA"
;
Context
context
(
system
,
integrator
,
Platform
::
getPlatformByName
(
platformName
)
);
...
...
@@ -170,6 +176,342 @@ void testVdw( FILE* log ) {
ASSERT_EQUAL_TOL
(
expectedEnergy
,
state
.
getPotentialEnergy
(),
tolerance
);
}
void
setupAndGetForcesEnergyVdwAmmonia
(
const
std
::
string
&
sigmaCombiningRule
,
const
std
::
string
&
epsilonCombiningRule
,
double
cutoff
,
double
boxDimension
,
std
::
vector
<
Vec3
>&
forces
,
double
&
energy
,
FILE
*
log
){
// beginning of Vdw setup
System
system
;
AmoebaVdwForce
*
amoebaVdwForce
=
new
AmoebaVdwForce
();;
int
numberOfParticles
=
8
;
amoebaVdwForce
->
setSigmaCombiningRule
(
sigmaCombiningRule
);
amoebaVdwForce
->
setEpsilonCombiningRule
(
epsilonCombiningRule
);
amoebaVdwForce
->
setUseNeighborList
(
1
);
amoebaVdwForce
->
setCutoff
(
cutoff
);
if
(
boxDimension
>
0.0
){
Vec3
a
(
boxDimension
,
0.0
,
0.0
);
Vec3
b
(
0.0
,
boxDimension
,
0.0
);
Vec3
c
(
0.0
,
0.0
,
boxDimension
);
system
.
setDefaultPeriodicBoxVectors
(
a
,
b
,
c
);
amoebaVdwForce
->
setPBC
(
1
);
}
else
{
amoebaVdwForce
->
setPBC
(
0
);
}
// addParticle: ivIndex, radius, epsilon, reductionFactor
int
classIndex
=
0
;
system
.
addParticle
(
1.4007000e+01
);
amoebaVdwForce
->
addParticle
(
0
,
classIndex
,
1.8550000e-01
,
4.3932000e-01
,
0.0000000e+00
);
system
.
addParticle
(
1.0080000e+00
);
amoebaVdwForce
->
addParticle
(
0
,
classIndex
,
1.3500000e-01
,
8.3680000e-02
,
9.1000000e-01
);
system
.
addParticle
(
1.0080000e+00
);
amoebaVdwForce
->
addParticle
(
0
,
classIndex
,
1.3500000e-01
,
8.3680000e-02
,
9.1000000e-01
);
system
.
addParticle
(
1.0080000e+00
);
amoebaVdwForce
->
addParticle
(
0
,
classIndex
,
1.3500000e-01
,
8.3680000e-02
,
9.1000000e-01
);
system
.
addParticle
(
1.4007000e+01
);
amoebaVdwForce
->
addParticle
(
4
,
classIndex
,
1.8550000e-01
,
4.3932000e-01
,
0.0000000e+00
);
system
.
addParticle
(
1.0080000e+00
);
amoebaVdwForce
->
addParticle
(
4
,
classIndex
,
1.3500000e-01
,
8.3680000e-02
,
9.1000000e-01
);
system
.
addParticle
(
1.0080000e+00
);
amoebaVdwForce
->
addParticle
(
4
,
classIndex
,
1.3500000e-01
,
8.3680000e-02
,
9.1000000e-01
);
system
.
addParticle
(
1.0080000e+00
);
amoebaVdwForce
->
addParticle
(
4
,
classIndex
,
1.3500000e-01
,
8.3680000e-02
,
9.1000000e-01
);
// ParticleExclusions
std
::
vector
<
int
>
exclusions
;
exclusions
.
resize
(
0
);
exclusions
.
push_back
(
0
);
exclusions
.
push_back
(
1
);
exclusions
.
push_back
(
2
);
exclusions
.
push_back
(
3
);
amoebaVdwForce
->
setParticleExclusions
(
0
,
exclusions
);
exclusions
.
resize
(
0
);
exclusions
.
push_back
(
1
);
exclusions
.
push_back
(
0
);
exclusions
.
push_back
(
2
);
exclusions
.
push_back
(
3
);
amoebaVdwForce
->
setParticleExclusions
(
1
,
exclusions
);
exclusions
.
resize
(
0
);
exclusions
.
push_back
(
2
);
exclusions
.
push_back
(
0
);
exclusions
.
push_back
(
1
);
exclusions
.
push_back
(
3
);
amoebaVdwForce
->
setParticleExclusions
(
2
,
exclusions
);
exclusions
.
resize
(
0
);
exclusions
.
push_back
(
3
);
exclusions
.
push_back
(
0
);
exclusions
.
push_back
(
1
);
exclusions
.
push_back
(
2
);
amoebaVdwForce
->
setParticleExclusions
(
3
,
exclusions
);
exclusions
.
resize
(
0
);
exclusions
.
push_back
(
4
);
exclusions
.
push_back
(
5
);
exclusions
.
push_back
(
6
);
exclusions
.
push_back
(
7
);
amoebaVdwForce
->
setParticleExclusions
(
4
,
exclusions
);
exclusions
.
resize
(
0
);
exclusions
.
push_back
(
5
);
exclusions
.
push_back
(
4
);
exclusions
.
push_back
(
6
);
exclusions
.
push_back
(
7
);
amoebaVdwForce
->
setParticleExclusions
(
5
,
exclusions
);
exclusions
.
resize
(
0
);
exclusions
.
push_back
(
6
);
exclusions
.
push_back
(
4
);
exclusions
.
push_back
(
5
);
exclusions
.
push_back
(
7
);
amoebaVdwForce
->
setParticleExclusions
(
6
,
exclusions
);
exclusions
.
resize
(
0
);
exclusions
.
push_back
(
7
);
exclusions
.
push_back
(
4
);
exclusions
.
push_back
(
5
);
exclusions
.
push_back
(
6
);
amoebaVdwForce
->
setParticleExclusions
(
7
,
exclusions
);
// end of Vdw setup
std
::
vector
<
Vec3
>
positions
(
numberOfParticles
);
positions
[
0
]
=
Vec3
(
1.5927280e-01
,
1.7000000e-06
,
1.6491000e-03
);
positions
[
1
]
=
Vec3
(
2.0805540e-01
,
-
8.1258800e-02
,
3.7282500e-02
);
positions
[
2
]
=
Vec3
(
2.0843610e-01
,
8.0953200e-02
,
3.7462200e-02
);
positions
[
3
]
=
Vec3
(
1.7280780e-01
,
2.0730000e-04
,
-
9.8741700e-02
);
positions
[
4
]
=
Vec3
(
-
1.6743680e-01
,
1.5900000e-05
,
-
6.6149000e-03
);
positions
[
5
]
=
Vec3
(
-
2.0428260e-01
,
8.1071500e-02
,
4.1343900e-02
);
positions
[
6
]
=
Vec3
(
-
6.7308300e-02
,
1.2800000e-05
,
1.0623300e-02
);
positions
[
7
]
=
Vec3
(
-
2.0426290e-01
,
-
8.1231400e-02
,
4.1033500e-02
);
system
.
addForce
(
amoebaVdwForce
);
std
::
string
platformName
;
platformName
=
"CUDA"
;
LangevinIntegrator
integrator
(
0.0
,
0.1
,
0.01
);
Context
context
(
system
,
integrator
,
Platform
::
getPlatformByName
(
platformName
)
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
forces
=
state
.
getForces
();
energy
=
state
.
getPotentialEnergy
();
}
void
compareForcesEnergy
(
std
::
string
&
testName
,
double
expectedEnergy
,
double
energy
,
std
::
vector
<
Vec3
>&
expectedForces
,
std
::
vector
<
Vec3
>&
forces
,
double
tolerance
,
FILE
*
log
)
{
#ifdef AMOEBA_DEBUG
if
(
log
){
(
void
)
fprintf
(
log
,
"%s: expected energy=%14.7e %14.7e
\n
"
,
testName
.
c_str
(),
expectedEnergy
,
state
.
getPotentialEnergy
()
);
for
(
unsigned
int
ii
=
0
;
ii
<
forces
.
size
();
ii
++
){
(
void
)
fprintf
(
log
,
"%6u [%14.7e %14.7e %14.7e] [%14.7e %14.7e %14.7e]
\n
"
,
ii
,
expectedForces
[
ii
][
0
],
expectedForces
[
ii
][
1
],
expectedForces
[
ii
][
2
],
forces
[
ii
][
0
],
forces
[
ii
][
1
],
forces
[
ii
][
2
]
);
}
(
void
)
fflush
(
log
);
}
#endif
for
(
unsigned
int
ii
=
0
;
ii
<
forces
.
size
();
ii
++
){
ASSERT_EQUAL_VEC_MOD
(
expectedForces
[
ii
],
forces
[
ii
],
tolerance
,
testName
);
}
ASSERT_EQUAL_TOL_MOD
(
expectedEnergy
,
energy
,
tolerance
,
testName
);
}
// test VDW w/ sigmaRule=CubicMean and epsilonRule=HHG
void
testVdwAmmoniaCubicMeanHhg
(
FILE
*
log
)
{
std
::
string
testName
=
"testVdwAmmoniaCubicMeanHhg"
;
int
numberOfParticles
=
8
;
double
boxDimension
=
-
1.0
;
double
cutoff
=
9000000.0
;
std
::
vector
<
Vec3
>
forces
;
double
energy
;
setupAndGetForcesEnergyVdwAmmonia
(
"CUBIC-MEAN"
,
"HHG"
,
cutoff
,
boxDimension
,
forces
,
energy
,
log
);
std
::
vector
<
Vec3
>
expectedForces
(
numberOfParticles
);
double
expectedEnergy
=
4.8012258e+00
;
expectedForces
[
0
]
=
Vec3
(
2.9265247e+02
,
-
1.4507808e-02
,
-
6.9562123e+00
);
expectedForces
[
1
]
=
Vec3
(
-
2.2451693e+00
,
4.8143073e-01
,
-
2.0041494e-01
);
expectedForces
[
2
]
=
Vec3
(
-
2.2440698e+00
,
-
4.7905450e-01
,
-
2.0125284e-01
);
expectedForces
[
3
]
=
Vec3
(
-
1.0840394e+00
,
-
5.8531253e-04
,
2.6934135e-01
);
expectedForces
[
4
]
=
Vec3
(
-
5.6305662e+01
,
1.4733908e-03
,
-
1.8083306e-01
);
expectedForces
[
5
]
=
Vec3
(
1.6750145e+00
,
-
3.2448374e-01
,
-
1.8030914e-01
);
expectedForces
[
6
]
=
Vec3
(
-
2.3412420e+02
,
1.0754069e-02
,
7.6287492e+00
);
expectedForces
[
7
]
=
Vec3
(
1.6756544e+00
,
3.2497316e-01
,
-
1.7906832e-01
);
double
tolerance
=
1.0e-04
;
compareForcesEnergy
(
testName
,
expectedEnergy
,
energy
,
expectedForces
,
forces
,
tolerance
,
log
);
}
// test VDW w/ sigmaRule=Arithmetic and epsilonRule=Arithmetic
void
testVdwAmmoniaArithmeticArithmetic
(
FILE
*
log
)
{
std
::
string
testName
=
"testVdwAmmoniaArithmeticArithmetic"
;
int
numberOfParticles
=
8
;
double
boxDimension
=
-
1.0
;
double
cutoff
=
9000000.0
;
std
::
vector
<
Vec3
>
forces
;
double
energy
;
setupAndGetForcesEnergyVdwAmmonia
(
"ARITHMETIC"
,
"ARITHMETIC"
,
cutoff
,
boxDimension
,
forces
,
energy
,
log
);
std
::
vector
<
Vec3
>
expectedForces
(
numberOfParticles
);
double
expectedEnergy
=
4.2252403e+00
;
expectedForces
[
0
]
=
Vec3
(
3.0603839e+02
,
-
1.5550310e-02
,
-
7.2661707e+00
);
expectedForces
[
1
]
=
Vec3
(
-
2.7801357e+00
,
5.8805051e-01
,
-
2.5907269e-01
);
expectedForces
[
2
]
=
Vec3
(
-
2.7753968e+00
,
-
5.8440732e-01
,
-
2.5969111e-01
);
expectedForces
[
3
]
=
Vec3
(
-
2.2496416e+00
,
-
1.1797440e-03
,
5.5501757e-01
);
expectedForces
[
4
]
=
Vec3
(
-
5.5077629e+01
,
8.3417114e-04
,
-
3.3668921e-01
);
expectedForces
[
5
]
=
Vec3
(
2.3752452e+00
,
-
4.6788669e-01
,
-
2.4907764e-01
);
expectedForces
[
6
]
=
Vec3
(
-
2.4790697e+02
,
1.1419770e-02
,
8.0629999e+00
);
expectedForces
[
7
]
=
Vec3
(
2.3761408e+00
,
4.6871961e-01
,
-
2.4731607e-01
);
double
tolerance
=
1.0e-04
;
compareForcesEnergy
(
testName
,
expectedEnergy
,
energy
,
expectedForces
,
forces
,
tolerance
,
log
);
}
// test VDW w/ sigmaRule=Geometric and epsilonRule=Geometric
void
testVdwAmmoniaGeometricGeometric
(
FILE
*
log
)
{
std
::
string
testName
=
"testVdwAmmoniaGeometricGeometric"
;
int
numberOfParticles
=
8
;
double
boxDimension
=
-
1.0
;
double
cutoff
=
9000000.0
;
std
::
vector
<
Vec3
>
forces
;
double
energy
;
setupAndGetForcesEnergyVdwAmmonia
(
"GEOMETRIC"
,
"GEOMETRIC"
,
cutoff
,
boxDimension
,
forces
,
energy
,
log
);
std
::
vector
<
Vec3
>
expectedForces
(
numberOfParticles
);
double
expectedEnergy
=
2.5249914e+00
;
expectedForces
[
0
]
=
Vec3
(
2.1169631e+02
,
-
1.0710925e-02
,
-
4.3728025e+00
);
expectedForces
[
1
]
=
Vec3
(
-
2.2585621e+00
,
4.8409995e-01
,
-
2.0188344e-01
);
expectedForces
[
2
]
=
Vec3
(
-
2.2551351e+00
,
-
4.8124855e-01
,
-
2.0246986e-01
);
expectedForces
[
3
]
=
Vec3
(
-
1.7178028e+00
,
-
9.0851787e-04
,
4.2466975e-01
);
expectedForces
[
4
]
=
Vec3
(
-
4.8302147e+01
,
9.6603376e-04
,
-
5.7972068e-01
);
expectedForces
[
5
]
=
Vec3
(
1.8100634e+00
,
-
3.5214093e-01
,
-
1.9357207e-01
);
expectedForces
[
6
]
=
Vec3
(
-
1.6078365e+02
,
7.2117601e-03
,
5.3180261e+00
);
expectedForces
[
7
]
=
Vec3
(
1.8109211e+00
,
3.5273117e-01
,
-
1.9224723e-01
);
double
tolerance
=
1.0e-04
;
compareForcesEnergy
(
testName
,
expectedEnergy
,
energy
,
expectedForces
,
forces
,
tolerance
,
log
);
}
void
testVdwAmmoniaCubicMeanHarmonic
(
FILE
*
log
)
{
std
::
string
testName
=
"testVdwAmmoniaCubicMeanHarmonic"
;
int
numberOfParticles
=
8
;
double
boxDimension
=
-
1.0
;
double
cutoff
=
9000000.0
;
std
::
vector
<
Vec3
>
forces
;
double
energy
;
setupAndGetForcesEnergyVdwAmmonia
(
"CUBIC-MEAN"
,
"HARMONIC"
,
cutoff
,
boxDimension
,
forces
,
energy
,
log
);
std
::
vector
<
Vec3
>
expectedForces
(
numberOfParticles
);
double
expectedEnergy
=
4.1369069e+00
;
expectedForces
[
0
]
=
Vec3
(
2.5854436e+02
,
-
1.2779529e-02
,
-
5.9041148e+00
);
expectedForces
[
1
]
=
Vec3
(
-
2.0832419e+00
,
4.4915831e-01
,
-
1.8266000e-01
);
expectedForces
[
2
]
=
Vec3
(
-
2.0823991e+00
,
-
4.4699804e-01
,
-
1.8347141e-01
);
expectedForces
[
3
]
=
Vec3
(
-
9.5914714e-01
,
-
5.2162026e-04
,
2.3873165e-01
);
expectedForces
[
4
]
=
Vec3
(
-
5.3724787e+01
,
1.4838241e-03
,
-
2.8089191e-01
);
expectedForces
[
5
]
=
Vec3
(
1.5074325e+00
,
-
2.9016397e-01
,
-
1.6385118e-01
);
expectedForces
[
6
]
=
Vec3
(
-
2.0271029e+02
,
9.2367947e-03
,
6.6389988e+00
);
expectedForces
[
7
]
=
Vec3
(
1.5080748e+00
,
2.9058422e-01
,
-
1.6274118e-01
);
double
tolerance
=
1.0e-04
;
compareForcesEnergy
(
testName
,
expectedEnergy
,
energy
,
expectedForces
,
forces
,
tolerance
,
log
);
}
// test w/ cutoff=0.25 nm; single ixn between two particles (0 and 6); force nonzero on
// particle 4 due to reduction applied to NH
// the distance between 0 and 6 is ~ 0.235 so the ixn is in the tapered region
void
testVdwTaper
(
FILE
*
log
)
{
std
::
string
testName
=
"testVdwTaper"
;
int
numberOfParticles
=
8
;
double
boxDimension
=
-
1.0
;
double
cutoff
=
0.25
;
std
::
vector
<
Vec3
>
forces
;
double
energy
;
setupAndGetForcesEnergyVdwAmmonia
(
"CUBIC-MEAN"
,
"HHG"
,
cutoff
,
boxDimension
,
forces
,
energy
,
log
);
std
::
vector
<
Vec3
>
expectedForces
(
numberOfParticles
);
double
expectedEnergy
=
3.5478444e+00
;
expectedForces
[
0
]
=
Vec3
(
5.6710779e+02
,
-
2.7391004e-02
,
-
1.7867730e+01
);
expectedForces
[
1
]
=
Vec3
(
-
0.0000000e+00
,
-
0.0000000e+00
,
-
0.0000000e+00
);
expectedForces
[
2
]
=
Vec3
(
-
0.0000000e+00
,
-
0.0000000e+00
,
-
0.0000000e+00
);
expectedForces
[
3
]
=
Vec3
(
-
0.0000000e+00
,
-
0.0000000e+00
,
-
0.0000000e+00
);
expectedForces
[
4
]
=
Vec3
(
-
5.1039701e+01
,
2.4651903e-03
,
1.6080957e+00
);
expectedForces
[
5
]
=
Vec3
(
-
0.0000000e+00
,
-
0.0000000e+00
,
-
0.0000000e+00
);
expectedForces
[
6
]
=
Vec3
(
-
5.1606809e+02
,
2.4925813e-02
,
1.6259634e+01
);
expectedForces
[
7
]
=
Vec3
(
-
0.0000000e+00
,
-
0.0000000e+00
,
-
0.0000000e+00
);
double
tolerance
=
1.0e-04
;
compareForcesEnergy
(
testName
,
expectedEnergy
,
energy
,
expectedForces
,
forces
,
tolerance
,
log
);
}
// test PBC
void
testVdwPBC
(
FILE
*
log
)
{
std
::
string
testName
=
"testVdwPBC"
;
int
numberOfParticles
=
8
;
double
boxDimension
=
0.6
;
double
cutoff
=
0.25
;
std
::
vector
<
Vec3
>
forces
;
double
energy
;
setupAndGetForcesEnergyVdwAmmonia
(
"CUBIC-MEAN"
,
"HHG"
,
cutoff
,
boxDimension
,
forces
,
energy
,
log
);
std
::
vector
<
Vec3
>
expectedForces
(
numberOfParticles
);
double
expectedEnergy
=
8.4385405e+00
;
expectedForces
[
0
]
=
Vec3
(
5.1453069e+02
,
4.9751912e-01
,
-
1.2759570e+01
);
expectedForces
[
1
]
=
Vec3
(
-
2.5622586e+02
,
-
4.6524265e+01
,
2.4281465e+01
);
expectedForces
[
2
]
=
Vec3
(
-
2.7538705e+02
,
5.1831690e+01
,
2.7367710e+01
);
expectedForces
[
3
]
=
Vec3
(
-
0.0000000e+00
,
-
0.0000000e+00
,
-
0.0000000e+00
);
expectedForces
[
4
]
=
Vec3
(
3.0883034e+02
,
-
5.8876974e+00
,
-
5.8286122e+01
);
expectedForces
[
5
]
=
Vec3
(
1.1319359e+02
,
-
3.2047069e-01
,
1.6181231e+00
);
expectedForces
[
6
]
=
Vec3
(
-
5.1606809e+02
,
2.4925813e-02
,
1.6259634e+01
);
expectedForces
[
7
]
=
Vec3
(
1.1112638e+02
,
3.7829857e-01
,
1.5187587e+00
);
// tolerance is higher here due to interpolation used in setting tapering coefficients;
// if tapering turned off, then absolute difference < 2.0e-05
double
tolerance
=
5.0e-04
;
compareForcesEnergy
(
testName
,
expectedEnergy
,
energy
,
expectedForces
,
forces
,
tolerance
,
log
);
}
int
main
(
int
numberOfArguments
,
char
*
argv
[]
)
{
...
...
@@ -178,7 +520,37 @@ int main( int numberOfArguments, char* argv[] ) {
registerAmoebaCudaKernelFactories
();
FILE
*
log
=
NULL
;
testVdw
(
log
);
// tests using two ammonia molecules
// test VDW w/ sigmaRule=CubicMean and epsilonRule=HHG
testVdwAmmoniaCubicMeanHhg
(
log
);
// test VDW w/ sigmaRule=Arithmetic and epsilonRule=Arithmetic
testVdwAmmoniaArithmeticArithmetic
(
log
);
// test VDW w/ sigmaRule=Geometric and epsilonRule=Geometric
testVdwAmmoniaGeometricGeometric
(
log
);
// test VDW w/ sigmaRule=CubicMean and epsilonRule=Harmonic
testVdwAmmoniaCubicMeanHarmonic
(
log
);
// test w/ cutoff=0.25 nm; single ixn between two particles (0 and 6); force nonzero on
// particle 4 due to reduction applied to NH
// the distance between 0 and 6 is ~ 0.235 so the ixn is in the tapered region
testVdwTaper
(
log
);
// test PBC
testVdwPBC
(
log
);
}
catch
(
const
std
::
exception
&
e
)
{
std
::
cout
<<
"exception: "
<<
e
.
what
()
<<
std
::
endl
;
std
::
cout
<<
"FAIL - ERROR. Test failed."
<<
std
::
endl
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment