Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
4a1e9683
Commit
4a1e9683
authored
Jun 28, 2011
by
Mark Friedrichs
Browse files
Optimzations and bug fix for mapping of torque to force for small molecules
parent
44bfd168
Changes
32
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
1457 additions
and
1197 deletions
+1457
-1197
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
...c/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
+22
-25
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
...uda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
+2
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+280
-842
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
.../src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
+89
-301
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticF1.h
...rc/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticF1.h
+182
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticF2P.h
...c/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticF2P.h
+466
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticT1.h
...rc/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticT1.h
+170
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticT2.h
...rc/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticT2.h
+181
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+48
-26
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
...forms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
+8
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaUtilities.cu
...atforms/cuda/src/kernels/kCalculateAmoebaCudaUtilities.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
...platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
+8
-0
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
View file @
4a1e9683
...
@@ -688,8 +688,9 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
...
@@ -688,8 +688,9 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
%s
Initial setup for matrix multiply
\n
"
,
methodName
);
fflush
(
amoebaGpu
->
log
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
cudaComputeAmoebaMutualInducedAndGkFieldBySOR
Initial setup for matrix multiply
\n
"
);
fflush
(
amoebaGpu
->
log
);
gpu
->
psPosq4
->
Download
();
amoebaGpu
->
psE_Field
->
Download
();
amoebaGpu
->
psE_Field
->
Download
();
amoebaGpu
->
psE_FieldPolar
->
Download
();
amoebaGpu
->
psE_FieldPolar
->
Download
();
amoebaGpu
->
psInducedDipole
->
Download
(),
amoebaGpu
->
psInducedDipole
->
Download
(),
...
@@ -708,9 +709,10 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
...
@@ -708,9 +709,10 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
(
void
)
fprintf
(
amoebaGpu
->
log
,
"PolX!!! %12.4e %12.4e "
,
amoebaGpu
->
psPolarizability
->
_pSysData
[
offset
+
1
],
amoebaGpu
->
psPolarizability
->
_pSysData
[
offset
+
2
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"PolX!!! %12.4e %12.4e "
,
amoebaGpu
->
psPolarizability
->
_pSysData
[
offset
+
1
],
amoebaGpu
->
psPolarizability
->
_pSysData
[
offset
+
2
]
);
}
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
" E[%14.6e %14.6e %14.6e] Mi[%14.6e %14.6e %14.6e]
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
" E[%14.6e %14.6e %14.6e] Mi[%14.6e %14.6e
%14.6e] Pos[%14.6e %14.6e
%14.6e]
\n
"
,
amoebaGpu
->
psE_Field
->
_pSysData
[
offset
],
amoebaGpu
->
psE_Field
->
_pSysData
[
offset
+
1
],
amoebaGpu
->
psE_Field
->
_pSysData
[
offset
+
2
],
amoebaGpu
->
psE_Field
->
_pSysData
[
offset
],
amoebaGpu
->
psE_Field
->
_pSysData
[
offset
+
1
],
amoebaGpu
->
psE_Field
->
_pSysData
[
offset
+
2
],
amoebaGpu
->
psInducedDipole
->
_pSysData
[
offset
],
amoebaGpu
->
psInducedDipole
->
_pSysData
[
offset
+
1
],
amoebaGpu
->
psInducedDipole
->
_pSysData
[
offset
+
2
]
);
amoebaGpu
->
psInducedDipole
->
_pSysData
[
offset
],
amoebaGpu
->
psInducedDipole
->
_pSysData
[
offset
+
1
],
amoebaGpu
->
psInducedDipole
->
_pSysData
[
offset
+
2
],
gpu
->
psPosq4
->
_pSysData
[
ii
].
x
,
gpu
->
psPosq4
->
_pSysData
[
ii
].
y
,
gpu
->
psPosq4
->
_pSysData
[
ii
].
z
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Ep[%14.6e %14.6e %14.6e] Mip[%14.6e %14.6e %14.6e]
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Ep[%14.6e %14.6e %14.6e] Mip[%14.6e %14.6e %14.6e]
\n
"
,
amoebaGpu
->
psE_FieldPolar
->
_pSysData
[
offset
],
amoebaGpu
->
psE_FieldPolar
->
_pSysData
[
offset
+
1
],
amoebaGpu
->
psE_FieldPolar
->
_pSysData
[
offset
+
2
],
amoebaGpu
->
psE_FieldPolar
->
_pSysData
[
offset
],
amoebaGpu
->
psE_FieldPolar
->
_pSysData
[
offset
+
1
],
amoebaGpu
->
psE_FieldPolar
->
_pSysData
[
offset
+
2
],
amoebaGpu
->
psInducedDipolePolar
->
_pSysData
[
offset
],
amoebaGpu
->
psInducedDipolePolar
->
_pSysData
[
offset
+
1
],
amoebaGpu
->
psInducedDipolePolar
->
_pSysData
[
offset
+
2
]
);
amoebaGpu
->
psInducedDipolePolar
->
_pSysData
[
offset
],
amoebaGpu
->
psInducedDipolePolar
->
_pSysData
[
offset
+
1
],
amoebaGpu
->
psInducedDipolePolar
->
_pSysData
[
offset
+
2
]
);
...
@@ -777,38 +779,33 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
...
@@ -777,38 +779,33 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
amoebaGpu
->
psCurrentEpsilon
->
_pDevData
);
amoebaGpu
->
psCurrentEpsilon
->
_pDevData
);
LAUNCHERROR
(
"kReduceMutualInducedAndGkFieldDelta_kernel"
);
LAUNCHERROR
(
"kReduceMutualInducedAndGkFieldDelta_kernel"
);
#if 0
// get total epsilon -- performing sums on cpu
{
float sum[4];
float currentEpsilon = -1.0e30;
for( int ii = 0; ii < 4; ii++ ){
sum[ii] = cudaGetSum( 3*gpu->natoms, amoebaGpu->psWorkVector[ii]);
sum[ii] = 48.033324f*sqrtf( sum[ii]/( (float) gpu->natoms) );
if( sum[ii] > currentEpsilon ){
currentEpsilon = sum[ii];
}
}
amoebaGpu->mutualInducedCurrentEpsilon = currentEpsilon;
(void) fprintf( amoebaGpu->log, "%s iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d sums=%14.6e %14.6e %14.6e %14.6e\n",
methodName, iteration, amoebaGpu->mutualInducedCurrentEpsilon,
amoebaGpu->psCurrentEpsilon->_pSysData[1],
amoebaGpu->psCurrentEpsilon->_pSysData[2], done, sum[0], sum[1], sum[2], sum[3] );
}
#endif
// Debye=48.033324f
// Debye=48.033324f
amoebaGpu
->
psCurrentEpsilon
->
Download
();
amoebaGpu
->
psCurrentEpsilon
->
Download
();
float
currentEpsilon
=
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
0
];
float
currentEpsilon
=
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
0
];
amoebaGpu
->
mutualInducedCurrentEpsilon
=
currentEpsilon
;
amoebaGpu
->
mutualInducedCurrentEpsilon
=
currentEpsilon
;
// check for nans
if
(
currentEpsilon
!=
currentEpsilon
){
(
void
)
fprintf
(
stderr
,
"cudaComputeAmoebaMutualInducedAndGkFieldBySOR at timestep=%d iteration=%3d eps is nan -- exiting.
\n
"
,
timestep
,
iteration
);
(
void
)
fflush
(
NULL
);
exit
(
-
1
);
}
// converged?
if
(
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
||
amoebaGpu
->
mutualInducedCurrentEpsilon
<
amoebaGpu
->
mutualInducedTargetEpsilon
){
if
(
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
||
amoebaGpu
->
mutualInducedCurrentEpsilon
<
amoebaGpu
->
mutualInducedTargetEpsilon
){
done
=
1
;
done
=
1
;
}
}
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"cudaComputeAmoebaMutualInducedAndGkFieldBySOR step=%d iteration=%3d eps %14.6e done=%d
\n
"
,
timestep
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
done
);
}
if
(
amoebaGpu
->
log
){
if
(
amoebaGpu
->
log
){
amoebaGpu
->
psInducedDipole
->
Download
();
amoebaGpu
->
psInducedDipole
->
Download
();
amoebaGpu
->
psInducedDipolePolar
->
Download
();
amoebaGpu
->
psInducedDipolePolar
->
Download
();
...
@@ -869,11 +866,11 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
...
@@ -869,11 +866,11 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
amoebaGpu
->
mutualInducedDone
=
done
;
amoebaGpu
->
mutualInducedDone
=
done
;
amoebaGpu
->
mutualInducedConverged
=
(
!
done
||
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
)
?
0
:
1
;
amoebaGpu
->
mutualInducedConverged
=
(
!
done
||
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
)
?
0
:
1
;
#ifdef AMOEBA_DEBUG
if
(
0
&&
amoebaGpu
->
log
){
if
(
0
&&
amoebaGpu
->
log
){
trackMutualInducedIterations
(
amoebaGpu
,
iteration
);
trackMutualInducedIterations
(
amoebaGpu
,
iteration
);
}
}
#ifdef AMOEBA_DEBUG
if
(
0
){
if
(
0
){
std
::
vector
<
int
>
fileId
;
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
//fileId.push_back( 0 );
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedField.cu
View file @
4a1e9683
...
@@ -558,9 +558,11 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
...
@@ -558,9 +558,11 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
amoebaGpu
->
psCurrentEpsilon
->
_pDevData
);
amoebaGpu
->
psCurrentEpsilon
->
_pDevData
);
LAUNCHERROR
(
"kReduceMutualInducedFieldDelta"
);
LAUNCHERROR
(
"kReduceMutualInducedFieldDelta"
);
#ifdef AMOEBA_DEBUG
if
(
0
&&
amoebaGpu
->
log
){
// trackMutualInducedIterations
if
(
0
&&
amoebaGpu
->
log
){
// trackMutualInducedIterations
trackMutualInducedIterations
(
amoebaGpu
,
iteration
);
trackMutualInducedIterations
(
amoebaGpu
,
iteration
);
}
}
#endif
// Debye=48.033324f
// Debye=48.033324f
amoebaGpu
->
psCurrentEpsilon
->
Download
();
amoebaGpu
->
psCurrentEpsilon
->
Download
();
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
View file @
4a1e9683
...
@@ -54,15 +54,11 @@ struct PmeDirectElectrostaticParticle {
...
@@ -54,15 +54,11 @@ struct PmeDirectElectrostaticParticle {
float
labFrameDipole
[
3
];
float
labFrameDipole
[
3
];
// lab frame quadrupole
// lab frame quadrupole
float
labFrameQuadrupole
[
5
];
float
labFrameQuadrupole
[
6
];
// induced dipole
// induced dipole
float
inducedDipole
[
3
];
float
inducedDipole
[
3
];
// polar induced dipole
float
inducedDipoleP
[
3
];
float
inducedDipoleP
[
3
];
// scaling factors
// scaling factors
...
@@ -72,7 +68,7 @@ struct PmeDirectElectrostaticParticle {
...
@@ -72,7 +68,7 @@ struct PmeDirectElectrostaticParticle {
float
force
[
3
];
float
force
[
3
];
float
torque
[
3
];
float
torque
[
3
];
//
float padding;
float
padding
;
#ifndef CALCULATE_FULL_TILE
#ifndef CALCULATE_FULL_TILE
float
tempForce
[
3
];
float
tempForce
[
3
];
...
@@ -117,816 +113,60 @@ __device__ static void debugSetup( unsigned int atomI, unsigned int atomJ,
...
@@ -117,816 +113,60 @@ __device__ static void debugSetup( unsigned int atomI, unsigned int atomJ,
// self-energy for PME
// self-energy for PME
__device__
static
void
calculatePmeSelfEnergyElectrostaticPairIxn_kernel
(
PmeDirectElectrostaticParticle
&
atomI
,
float
*
energy
)
__device__
static
void
calculatePmeSelfEnergyElectrostaticPairIxn_kernel
(
PmeDirectElectrostaticParticle
&
atomI
,
float
*
totalEnergy
)
{
float
term
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
fterm
=
-
cSim
.
alphaEwald
/
cAmoebaSim
.
sqrtPi
;
float
cii
=
atomI
.
q
*
atomI
.
q
;
float
dii
=
atomI
.
labFrameDipole
[
0
]
*
atomI
.
labFrameDipole
[
0
]
+
atomI
.
labFrameDipole
[
1
]
*
atomI
.
labFrameDipole
[
1
]
+
atomI
.
labFrameDipole
[
2
]
*
atomI
.
labFrameDipole
[
2
];
float
qii
=
atomI
.
labFrameQuadrupole
[
0
]
*
atomI
.
labFrameQuadrupole
[
0
]
+
atomI
.
labFrameQuadrupole
[
3
]
*
atomI
.
labFrameQuadrupole
[
3
]
+
atomI
.
labFrameQuadrupole
[
5
]
*
atomI
.
labFrameQuadrupole
[
5
]
+
2.0
f
*
(
atomI
.
labFrameQuadrupole
[
1
]
*
atomI
.
labFrameQuadrupole
[
1
]
+
atomI
.
labFrameQuadrupole
[
2
]
*
atomI
.
labFrameQuadrupole
[
2
]
+
atomI
.
labFrameQuadrupole
[
4
]
*
atomI
.
labFrameQuadrupole
[
4
]);
float
uii
=
atomI
.
labFrameDipole
[
0
]
*
atomI
.
inducedDipole
[
0
]
+
atomI
.
labFrameDipole
[
1
]
*
atomI
.
inducedDipole
[
1
]
+
atomI
.
labFrameDipole
[
2
]
*
atomI
.
inducedDipole
[
2
];
*
energy
=
(
cii
+
term
*
(
dii
/
3.0
f
+
2.0
f
*
term
*
qii
/
5.0
f
));
*
energy
+=
term
*
uii
/
3.0
f
;
*
energy
*=
fterm
;
}
// self-torque for PME
__device__
static
void
calculatePmeSelfTorqueElectrostaticPairIxn_kernel
(
PmeDirectElectrostaticParticle
&
atomI
)
{
{
float
term
=
(
4.0
f
/
3.0
f
)
*
(
cSim
.
alphaEwald
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
)
/
cAmoebaSim
.
sqrtPi
;
float
term
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
fterm
=
-
cSim
.
alphaEwald
/
cAmoebaSim
.
sqrtPi
;
float
uix
=
0.5
f
*
(
atomI
.
inducedDipole
[
0
]
+
atomI
.
inducedDipoleP
[
0
]);
float
uiy
=
0.5
f
*
(
atomI
.
inducedDipole
[
1
]
+
atomI
.
inducedDipoleP
[
1
]);
float
uiz
=
0.5
f
*
(
atomI
.
inducedDipole
[
2
]
+
atomI
.
inducedDipoleP
[
2
]);
atomI
.
torque
[
0
]
+=
term
*
(
atomI
.
labFrameDipole
[
1
]
*
uiz
-
atomI
.
labFrameDipole
[
2
]
*
uiy
);
float
cii
=
atomI
.
q
*
atomI
.
q
;
atomI
.
torque
[
1
]
+=
term
*
(
atomI
.
labFrameDipole
[
2
]
*
uix
-
atomI
.
labFrameDipole
[
0
]
*
uiz
);
atomI
.
torque
[
2
]
+=
term
*
(
atomI
.
labFrameDipole
[
0
]
*
uiy
-
atomI
.
labFrameDipole
[
1
]
*
uix
);
}
float
dii
=
atomI
.
labFrameDipole
[
0
]
*
atomI
.
labFrameDipole
[
0
]
+
atomI
.
labFrameDipole
[
1
]
*
atomI
.
labFrameDipole
[
1
]
+
atomI
.
labFrameDipole
[
2
]
*
atomI
.
labFrameDipole
[
2
];
/*
/*
#undef SUB_METHOD_NAME
float qii = atomI.labFrameQuadrupole[0]*atomI.labFrameQuadrupole[0] +
#undef F1
atomI.labFrameQuadrupole[3]*atomI.labFrameQuadrupole[3] +
#define SUB_METHOD_NAME(a, b) a##F1##b
atomI.labFrameQuadrupole[5]*atomI.labFrameQuadrupole[5] + 2.0f*(
#define F1
atomI.labFrameQuadrupole[1]*atomI.labFrameQuadrupole[1] +
//#define T1
atomI.labFrameQuadrupole[2]*atomI.labFrameQuadrupole[2] +
#include "kCalculateAmoebaCudaPmeDirectElectrostatic_b.h"
atomI.labFrameQuadrupole[4]*atomI.labFrameQuadrupole[4]);
#undef F1
//#undef T1
#undef SUB_METHOD_NAME
#define SUB_METHOD_NAME(a, b) a##F2##b
#define F2
#include "kCalculateAmoebaCudaPmeDirectElectrostatic_b.h"
#undef F2
#undef SUB_METHOD_NAME
#undef SUB_METHOD_NAME
#undef T1
#define SUB_METHOD_NAME(a, b) a##T1##b
#define T1
#include "kCalculateAmoebaCudaPmeDirectElectrostatic_b.h"
#undef T1
#undef SUB_METHOD_NAME
#define SUB_METHOD_NAME(a, b) a##T2##b
#define T2
#include "kCalculateAmoebaCudaPmeDirectElectrostatic_b.h"
#undef T2
#undef SUB_METHOD_NAME
#undef SUB_METHOD_NAME
#undef T3
#define SUB_METHOD_NAME(a, b) a##T3##b
#define T3
#include "kCalculateAmoebaCudaPmeDirectElectrostatic_b.h"
#undef T3
#undef SUB_METHOD_NAME
#define SUB_METHOD_NAME(a, b) a##T4##b
#define T4
#include "kCalculateAmoebaCudaPmeDirectElectrostatic_b.h"
#undef T4
#undef SUB_METHOD_NAME
*/
*/
static
__device__
void
calculatePmeDirectElectrostaticPairIxnF1_kernel
(
const
PmeDirectElectrostaticParticle
&
atomI
,
const
PmeDirectElectrostaticParticle
&
atomJ
,
float
qii
=
atomI
.
labFrameQuadrupole
[
0
]
*
atomI
.
labFrameQuadrupole
[
0
]
+
const
float4
delta
,
const
float4
bn
,
const
float
bn5
,
const
float
*
scalingFactors
,
float4
*
forceTorqueEnergy
){
atomI
.
labFrameQuadrupole
[
3
]
*
atomI
.
labFrameQuadrupole
[
3
]
+
atomI
.
labFrameQuadrupole
[
0
]
*
atomI
.
labFrameQuadrupole
[
3
]
+
float
xr
=
delta
.
x
;
atomI
.
labFrameQuadrupole
[
1
]
*
atomI
.
labFrameQuadrupole
[
1
]
+
float
yr
=
delta
.
y
;
atomI
.
labFrameQuadrupole
[
2
]
*
atomI
.
labFrameQuadrupole
[
2
]
+
float
zr
=
delta
.
z
;
atomI
.
labFrameQuadrupole
[
4
]
*
atomI
.
labFrameQuadrupole
[
4
];
float
r
=
delta
.
w
;
// set the permanent multipole and induced dipole values;
float
ci
=
atomI
.
q
;
float
di1
=
atomI
.
labFrameDipole
[
0
];
float
di2
=
atomI
.
labFrameDipole
[
1
];
float
di3
=
atomI
.
labFrameDipole
[
2
];
float
qi1
=
atomI
.
labFrameQuadrupole
[
0
];
float
qi2
=
atomI
.
labFrameQuadrupole
[
1
];
float
qi3
=
atomI
.
labFrameQuadrupole
[
2
];
float
qi5
=
atomI
.
labFrameQuadrupole
[
3
];
float
qi6
=
atomI
.
labFrameQuadrupole
[
4
];
float
qi9
=
atomI
.
labFrameQuadrupole
[
5
];
float
ck
=
atomJ
.
q
;
float
dk1
=
atomJ
.
labFrameDipole
[
0
];
float
dk2
=
atomJ
.
labFrameDipole
[
1
];
float
dk3
=
atomJ
.
labFrameDipole
[
2
];
float
qk1
=
atomJ
.
labFrameQuadrupole
[
0
];
float
qk2
=
atomJ
.
labFrameQuadrupole
[
1
];
float
qk3
=
atomJ
.
labFrameQuadrupole
[
2
];
float
qk5
=
atomJ
.
labFrameQuadrupole
[
3
];
float
qk6
=
atomJ
.
labFrameQuadrupole
[
4
];
float
qk9
=
atomJ
.
labFrameQuadrupole
[
5
];
float
bn1
=
bn
.
x
;
float
bn2
=
bn
.
y
;
float
bn3
=
bn
.
z
;
float
bn4
=
bn
.
w
;
// apply Thole polarization damping to scale factors
float
rr1
=
1.0
f
/
r
;
float
rr2
=
rr1
*
rr1
;
float
rr3
=
rr1
*
rr2
;
float
rr5
=
3.0
f
*
rr3
*
rr2
;
float
rr7
=
5.0
f
*
rr5
*
rr2
;
float
rr9
=
7.0
f
*
rr7
*
rr2
;
float
rr11
=
9.0
f
*
rr9
*
rr2
;
float
qir1
=
qi1
*
xr
+
qi2
*
yr
+
qi3
*
zr
;
float
qir2
=
qi2
*
xr
+
qi5
*
yr
+
qi6
*
zr
;
float
qir3
=
qi3
*
xr
+
qi6
*
yr
+
qi9
*
zr
;
float
qkr1
=
qk1
*
xr
+
qk2
*
yr
+
qk3
*
zr
;
float
qkr2
=
qk2
*
xr
+
qk5
*
yr
+
qk6
*
zr
;
float
qkr3
=
qk3
*
xr
+
qk6
*
yr
+
qk9
*
zr
;
float
offset
=
1.0
f
-
scalingFactors
[
MScaleIndex
];
float
gf4
=
2.0
f
*
(
bn2
-
offset
*
rr5
);
float
qidk1
=
qi1
*
dk1
+
qi2
*
dk2
+
qi3
*
dk3
;
float
qkdi1
=
qk1
*
di1
+
qk2
*
di2
+
qk3
*
di3
;
float
ftm21
=
gf4
*
(
qkdi1
-
qidk1
);
float
qidk2
=
qi2
*
dk1
+
qi5
*
dk2
+
qi6
*
dk3
;
float
qkdi2
=
qk2
*
di1
+
qk5
*
di2
+
qk6
*
di3
;
float
ftm22
=
gf4
*
(
qkdi2
-
qidk2
);
float
qidk3
=
qi3
*
dk1
+
qi6
*
dk2
+
qi9
*
dk3
;
float
qkdi3
=
qk3
*
di1
+
qk6
*
di2
+
qk9
*
di3
;
float
ftm23
=
gf4
*
(
qkdi3
-
qidk3
);
float
gf7
=
4.0
f
*
(
bn3
-
offset
*
rr7
);
float
qiqkr1
=
qi1
*
qkr1
+
qi2
*
qkr2
+
qi3
*
qkr3
;
float
qkqir1
=
qk1
*
qir1
+
qk2
*
qir2
+
qk3
*
qir3
;
ftm21
+=
gf7
*
(
qiqkr1
+
qkqir1
);
float
qiqkr2
=
qi2
*
qkr1
+
qi5
*
qkr2
+
qi6
*
qkr3
;
float
qkqir2
=
qk2
*
qir1
+
qk5
*
qir2
+
qk6
*
qir3
;
ftm22
+=
gf7
*
(
qiqkr2
+
qkqir2
);
float
qiqkr3
=
qi3
*
qkr1
+
qi6
*
qkr2
+
qi9
*
qkr3
;
float
qkqir3
=
qk3
*
qir1
+
qk6
*
qir2
+
qk9
*
qir3
;
ftm23
+=
gf7
*
(
qiqkr3
+
qkqir3
);
// calculate the scalar products for permanent components
float
sc2
=
di1
*
dk1
+
di2
*
dk2
+
di3
*
dk3
;
float
sc7
=
qir1
*
dk1
+
qir2
*
dk2
+
qir3
*
dk3
;
float
sc8
=
qkr1
*
di1
+
qkr2
*
di2
+
qkr3
*
di3
;
float
sc9
=
qir1
*
qkr1
+
qir2
*
qkr2
+
qir3
*
qkr3
;
float
sc10
=
qi1
*
qk1
+
qi2
*
qk2
+
qi3
*
qk3
+
qi2
*
qk2
+
qi5
*
qk5
+
qi6
*
qk6
+
qi3
*
qk3
+
qi6
*
qk6
+
qi9
*
qk9
;
float
sc3
=
di1
*
xr
+
di2
*
yr
+
di3
*
zr
;
float
sc5
=
qir1
*
xr
+
qir2
*
yr
+
qir3
*
zr
;
float
sc4
=
dk1
*
xr
+
dk2
*
yr
+
dk3
*
zr
;
float
sc6
=
qkr1
*
xr
+
qkr2
*
yr
+
qkr3
*
zr
;
// calculate the scalar products for induced components
// calculate the gl functions for permanent components
float
gl0
=
ci
*
ck
;
float
gl1
=
ck
*
sc3
-
ci
*
sc4
;
float
gl2
=
ci
*
sc6
+
ck
*
sc5
-
sc3
*
sc4
;
float
gl3
=
sc3
*
sc6
-
sc4
*
sc5
;
float
gl4
=
sc5
*
sc6
;
float
gl5
=
-
4.0
f
*
sc9
;
float
gl6
=
sc2
;
float
gl7
=
2.0
f
*
(
sc7
-
sc8
);
float
gl8
=
2.0
f
*
sc10
;
forceTorqueEnergy
->
w
+=
-
offset
*
rr1
*
gl0
+
(
bn1
-
offset
*
rr3
)
*
(
gl1
+
gl6
)
+
(
bn2
-
offset
*
rr5
)
*
(
gl2
+
gl7
+
gl8
)
+
(
bn3
-
offset
*
rr7
)
*
(
gl3
+
gl5
)
+
(
bn4
-
offset
*
rr9
)
*
gl4
;
// calculate the gl functions for induced components
// intermediate variables for permanent force terms
float
gf1
=
bn1
*
gl0
+
bn2
*
(
gl1
+
gl6
)
+
bn3
*
(
gl2
+
gl7
+
gl8
)
+
bn4
*
(
gl3
+
gl5
)
+
bn5
*
gl4
;
gf1
-=
offset
*
(
rr3
*
gl0
+
rr5
*
(
gl1
+
gl6
)
+
rr7
*
(
gl2
+
gl7
+
gl8
)
+
rr9
*
(
gl3
+
gl5
)
+
rr11
*
gl4
);
ftm21
+=
gf1
*
xr
;
ftm22
+=
gf1
*
yr
;
ftm23
+=
gf1
*
zr
;
float
gf2
=
-
ck
*
bn1
+
sc4
*
bn2
-
sc6
*
bn3
-
offset
*
(
-
ck
*
rr3
+
sc4
*
rr5
-
sc6
*
rr7
);
ftm21
+=
gf2
*
di1
;
ftm22
+=
gf2
*
di2
;
ftm23
+=
gf2
*
di3
;
float
gf3
=
ci
*
bn1
+
sc3
*
bn2
+
sc5
*
bn3
-
offset
*
(
ci
*
rr3
+
sc3
*
rr5
+
sc5
*
rr7
);
ftm21
+=
gf3
*
dk1
;
ftm22
+=
gf3
*
dk2
;
ftm23
+=
gf3
*
dk3
;
float
gf5
=
2.0
f
*
(
-
ck
*
bn2
+
sc4
*
bn3
-
sc6
*
bn4
-
offset
*
(
-
ck
*
rr5
+
sc4
*
rr7
-
sc6
*
rr9
));
ftm21
+=
gf5
*
qir1
;
ftm22
+=
gf5
*
qir2
;
ftm23
+=
gf5
*
qir3
;
float
gf6
=
2.0
f
*
(
-
ci
*
bn2
-
sc3
*
bn3
-
sc5
*
bn4
-
offset
*
(
-
ci
*
rr5
-
sc3
*
rr7
-
sc5
*
rr9
));
ftm21
+=
gf6
*
qkr1
;
ftm22
+=
gf6
*
qkr2
;
ftm23
+=
gf6
*
qkr3
;
forceTorqueEnergy
->
x
=
ftm21
;
forceTorqueEnergy
->
y
=
ftm22
;
forceTorqueEnergy
->
z
=
ftm23
;
return
;
}
static
__device__
void
calculatePmeDirectElectrostaticPairIxnF2_kernel
(
const
PmeDirectElectrostaticParticle
&
atomI
,
const
PmeDirectElectrostaticParticle
&
atomJ
,
const
float4
delta
,
const
float4
bn
,
const
float
*
scalingFactors
,
float4
*
forceTorqueEnergy
){
float
xr
=
delta
.
x
;
float
yr
=
delta
.
y
;
float
zr
=
delta
.
z
;
float
r
=
delta
.
w
;
// set the permanent multipole and induced dipole values;
float
ci
=
atomI
.
q
;
float
di1
=
atomI
.
labFrameDipole
[
0
];
float
di2
=
atomI
.
labFrameDipole
[
1
];
float
di3
=
atomI
.
labFrameDipole
[
2
];
float
qi1
=
atomI
.
labFrameQuadrupole
[
0
];
float
qi2
=
atomI
.
labFrameQuadrupole
[
1
];
float
qi3
=
atomI
.
labFrameQuadrupole
[
2
];
float
qi5
=
atomI
.
labFrameQuadrupole
[
3
];
float
qi6
=
atomI
.
labFrameQuadrupole
[
4
];
float
qi9
=
atomI
.
labFrameQuadrupole
[
5
];
float
ck
=
atomJ
.
q
;
float
dk1
=
atomJ
.
labFrameDipole
[
0
];
float
dk2
=
atomJ
.
labFrameDipole
[
1
];
float
dk3
=
atomJ
.
labFrameDipole
[
2
];
float
qk1
=
atomJ
.
labFrameQuadrupole
[
0
];
float
qk2
=
atomJ
.
labFrameQuadrupole
[
1
];
float
qk3
=
atomJ
.
labFrameQuadrupole
[
2
];
float
qk5
=
atomJ
.
labFrameQuadrupole
[
3
];
float
qk6
=
atomJ
.
labFrameQuadrupole
[
4
];
float
qk9
=
atomJ
.
labFrameQuadrupole
[
5
];
float
bn1
=
bn
.
x
;
float
bn2
=
bn
.
y
;
float
bn3
=
bn
.
z
;
float
bn4
=
bn
.
w
;
// apply Thole polarization damping to scale factors
float
rr1
=
1.0
f
/
r
;
float
rr2
=
rr1
*
rr1
;
float
rr3
=
rr1
*
rr2
;
float
rr5
=
3.0
f
*
rr3
*
rr2
;
float
rr7
=
5.0
f
*
rr5
*
rr2
;
float
rr9
=
7.0
f
*
rr7
*
rr2
;
float
scale3
=
1.0
f
;
float
scale5
=
1.0
f
;
float
scale7
=
1.0
f
;
float
damp
=
atomI
.
damp
*
atomJ
.
damp
;
if
(
damp
!=
0.0
f
){
float
pgamma
=
atomI
.
thole
<
atomJ
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
float
ratio
=
r
/
damp
;
damp
=
-
pgamma
*
ratio
*
ratio
*
ratio
;
if
(
damp
>
-
50.0
f
){
float
expdamp
=
exp
(
damp
);
scale3
=
1.0
f
-
expdamp
;
scale5
=
1.0
f
-
(
1.0
f
-
damp
)
*
expdamp
;
scale7
=
1.0
f
-
(
1.0
f
-
damp
+
0.6
f
*
damp
*
damp
)
*
expdamp
;
}
else
{
damp
=
0.0
f
;
}
}
float
psc5
=
1.0
f
-
scale5
*
scalingFactors
[
PScaleIndex
];
float
dsc5
=
1.0
f
-
scale5
*
scalingFactors
[
DScaleIndex
];
float
qiuk1
=
qi1
*
atomJ
.
inducedDipole
[
0
]
+
qi2
*
atomJ
.
inducedDipole
[
1
]
+
qi3
*
atomJ
.
inducedDipole
[
2
];
float
qkui1
=
qk1
*
atomI
.
inducedDipole
[
0
]
+
qk2
*
atomI
.
inducedDipole
[
1
]
+
qk3
*
atomI
.
inducedDipole
[
2
];
float
qiukp1
=
qi1
*
atomJ
.
inducedDipoleP
[
0
]
+
qi2
*
atomJ
.
inducedDipoleP
[
1
]
+
qi3
*
atomJ
.
inducedDipoleP
[
2
];
float
qkuip1
=
qk1
*
atomI
.
inducedDipoleP
[
0
]
+
qk2
*
atomI
.
inducedDipoleP
[
1
]
+
qk3
*
atomI
.
inducedDipoleP
[
2
];
forceTorqueEnergy
->
x
+=
bn2
*
(
qkui1
+
qkuip1
-
qiuk1
-
qiukp1
);
forceTorqueEnergy
->
x
-=
rr5
*
((
qkui1
-
qiuk1
)
*
psc5
+
(
qkuip1
-
qiukp1
)
*
dsc5
);
float
qiuk2
=
qi2
*
atomJ
.
inducedDipole
[
0
]
+
qi5
*
atomJ
.
inducedDipole
[
1
]
+
qi6
*
atomJ
.
inducedDipole
[
2
];
float
qkui2
=
qk2
*
atomI
.
inducedDipole
[
0
]
+
qk5
*
atomI
.
inducedDipole
[
1
]
+
qk6
*
atomI
.
inducedDipole
[
2
];
float
qiukp2
=
qi2
*
atomJ
.
inducedDipoleP
[
0
]
+
qi5
*
atomJ
.
inducedDipoleP
[
1
]
+
qi6
*
atomJ
.
inducedDipoleP
[
2
];
float
qkuip2
=
qk2
*
atomI
.
inducedDipoleP
[
0
]
+
qk5
*
atomI
.
inducedDipoleP
[
1
]
+
qk6
*
atomI
.
inducedDipoleP
[
2
];
forceTorqueEnergy
->
y
+=
bn2
*
(
qkui2
+
qkuip2
-
qiuk2
-
qiukp2
);
forceTorqueEnergy
->
y
-=
rr5
*
((
qkui2
-
qiuk2
)
*
psc5
+
(
qkuip2
-
qiukp2
)
*
dsc5
);
float
qiuk3
=
qi3
*
atomJ
.
inducedDipole
[
0
]
+
qi6
*
atomJ
.
inducedDipole
[
1
]
+
qi9
*
atomJ
.
inducedDipole
[
2
];
float
qkui3
=
qk3
*
atomI
.
inducedDipole
[
0
]
+
qk6
*
atomI
.
inducedDipole
[
1
]
+
qk9
*
atomI
.
inducedDipole
[
2
];
float
qiukp3
=
qi3
*
atomJ
.
inducedDipoleP
[
0
]
+
qi6
*
atomJ
.
inducedDipoleP
[
1
]
+
qi9
*
atomJ
.
inducedDipoleP
[
2
];
float
qkuip3
=
qk3
*
atomI
.
inducedDipoleP
[
0
]
+
qk6
*
atomI
.
inducedDipoleP
[
1
]
+
qk9
*
atomI
.
inducedDipoleP
[
2
];
forceTorqueEnergy
->
z
+=
bn2
*
(
qkui3
+
qkuip3
-
qiuk3
-
qiukp3
);
forceTorqueEnergy
->
z
-=
rr5
*
((
qkui3
-
qiuk3
)
*
psc5
+
(
qkuip3
-
qiukp3
)
*
dsc5
);
float
sc3
=
di1
*
xr
+
di2
*
yr
+
di3
*
zr
;
float
sc4
=
dk1
*
xr
+
dk2
*
yr
+
dk3
*
zr
;
float
qir1
=
qi1
*
xr
+
qi2
*
yr
+
qi3
*
zr
;
float
qir2
=
qi2
*
xr
+
qi5
*
yr
+
qi6
*
zr
;
float
qir3
=
qi3
*
xr
+
qi6
*
yr
+
qi9
*
zr
;
float
sc5
=
qir1
*
xr
+
qir2
*
yr
+
qir3
*
zr
;
float
qkr1
=
qk1
*
xr
+
qk2
*
yr
+
qk3
*
zr
;
float
qkr2
=
qk2
*
xr
+
qk5
*
yr
+
qk6
*
zr
;
float
qkr3
=
qk3
*
xr
+
qk6
*
yr
+
qk9
*
zr
;
float
sc6
=
qkr1
*
xr
+
qkr2
*
yr
+
qkr3
*
zr
;
// calculate the scalar products for induced components
float
sci3
=
atomI
.
inducedDipole
[
0
]
*
xr
+
atomI
.
inducedDipole
[
1
]
*
yr
+
atomI
.
inducedDipole
[
2
]
*
zr
;
float
scip3
=
atomI
.
inducedDipoleP
[
0
]
*
xr
+
atomI
.
inducedDipoleP
[
1
]
*
yr
+
atomI
.
inducedDipoleP
[
2
]
*
zr
;
float
sci4
=
atomJ
.
inducedDipole
[
0
]
*
xr
+
atomJ
.
inducedDipole
[
1
]
*
yr
+
atomJ
.
inducedDipole
[
2
]
*
zr
;
float
scip4
=
atomJ
.
inducedDipoleP
[
0
]
*
xr
+
atomJ
.
inducedDipoleP
[
1
]
*
yr
+
atomJ
.
inducedDipoleP
[
2
]
*
zr
;
float
usc5
=
1.0
f
-
scale5
*
scalingFactors
[
UScaleIndex
];
float
prefactor1
=
0.5
f
*
(
bn2
-
rr5
*
usc5
);
forceTorqueEnergy
->
x
+=
prefactor1
*
(
(
sci4
*
atomI
.
inducedDipoleP
[
0
]
+
scip4
*
atomI
.
inducedDipole
[
0
])
+
sci3
*
atomJ
.
inducedDipoleP
[
0
]
+
scip3
*
atomJ
.
inducedDipole
[
0
]
);
forceTorqueEnergy
->
y
+=
prefactor1
*
(
(
sci4
*
atomI
.
inducedDipoleP
[
1
]
+
scip4
*
atomI
.
inducedDipole
[
1
])
+
sci3
*
atomJ
.
inducedDipoleP
[
1
]
+
scip3
*
atomJ
.
inducedDipole
[
1
]
);
forceTorqueEnergy
->
z
+=
prefactor1
*
(
(
sci4
*
atomI
.
inducedDipoleP
[
2
]
+
scip4
*
atomI
.
inducedDipole
[
2
])
+
sci3
*
atomJ
.
inducedDipoleP
[
2
]
+
scip3
*
atomJ
.
inducedDipole
[
2
]
);
float
dsc7
=
1.0
f
-
scale7
*
scalingFactors
[
DScaleIndex
];
float
psc7
=
1.0
f
-
scale7
*
scalingFactors
[
PScaleIndex
];
float
gfi5
=
bn3
*
(
sci4
+
scip4
)
-
rr7
*
(
sci4
*
psc7
+
scip4
*
dsc7
);
forceTorqueEnergy
->
x
+=
gfi5
*
qir1
;
forceTorqueEnergy
->
y
+=
gfi5
*
qir2
;
forceTorqueEnergy
->
z
+=
gfi5
*
qir3
;
prefactor1
=
0.5
f
*
(
bn2
*
(
sci4
+
scip4
)
-
rr5
*
(
sci4
*
psc5
+
scip4
*
dsc5
)
);
float
prefactor2
=
0.5
f
*
(
bn2
*
(
sci3
+
scip3
)
-
rr5
*
(
sci3
*
psc5
+
scip3
*
dsc5
)
);
forceTorqueEnergy
->
x
+=
prefactor1
*
di1
+
prefactor2
*
dk1
;
forceTorqueEnergy
->
y
+=
prefactor1
*
di2
+
prefactor2
*
dk2
;
forceTorqueEnergy
->
z
+=
prefactor1
*
di3
+
prefactor2
*
dk3
;
float
gfi6
=
-
bn3
*
(
sci3
+
scip3
)
+
rr7
*
(
sci3
*
psc7
+
scip3
*
dsc7
);
forceTorqueEnergy
->
x
+=
gfi6
*
qkr1
;
forceTorqueEnergy
->
y
+=
gfi6
*
qkr2
;
forceTorqueEnergy
->
z
+=
gfi6
*
qkr3
;
float
sci1
=
atomI
.
inducedDipole
[
0
]
*
dk1
+
atomI
.
inducedDipole
[
1
]
*
dk2
+
atomI
.
inducedDipole
[
2
]
*
dk3
+
di1
*
atomJ
.
inducedDipole
[
0
]
+
di2
*
atomJ
.
inducedDipole
[
1
]
+
di3
*
atomJ
.
inducedDipole
[
2
];
float
sci7
=
qir1
*
atomJ
.
inducedDipole
[
0
]
+
qir2
*
atomJ
.
inducedDipole
[
1
]
+
qir3
*
atomJ
.
inducedDipole
[
2
];
float
sci8
=
qkr1
*
atomI
.
inducedDipole
[
0
]
+
qkr2
*
atomI
.
inducedDipole
[
1
]
+
qkr3
*
atomI
.
inducedDipole
[
2
];
float
scip1
=
atomI
.
inducedDipoleP
[
0
]
*
dk1
+
atomI
.
inducedDipoleP
[
1
]
*
dk2
+
atomI
.
inducedDipoleP
[
2
]
*
dk3
+
di1
*
atomJ
.
inducedDipoleP
[
0
]
+
di2
*
atomJ
.
inducedDipoleP
[
1
]
+
di3
*
atomJ
.
inducedDipoleP
[
2
];
float
scip2
=
atomI
.
inducedDipole
[
0
]
*
atomJ
.
inducedDipoleP
[
0
]
+
atomI
.
inducedDipole
[
1
]
*
atomJ
.
inducedDipoleP
[
1
]
+
atomI
.
inducedDipole
[
2
]
*
atomJ
.
inducedDipoleP
[
2
]
+
atomJ
.
inducedDipole
[
0
]
*
atomI
.
inducedDipoleP
[
0
]
+
atomJ
.
inducedDipole
[
1
]
*
atomI
.
inducedDipoleP
[
1
]
+
atomJ
.
inducedDipole
[
2
]
*
atomI
.
inducedDipoleP
[
2
];
float
scip7
=
qir1
*
atomJ
.
inducedDipoleP
[
0
]
+
qir2
*
atomJ
.
inducedDipoleP
[
1
]
+
qir3
*
atomJ
.
inducedDipoleP
[
2
];
float
scip8
=
qkr1
*
atomI
.
inducedDipoleP
[
0
]
+
qkr2
*
atomI
.
inducedDipoleP
[
1
]
+
qkr3
*
atomI
.
inducedDipoleP
[
2
];
// calculate the gl functions for permanent components
// calculate the gl functions for induced components
float
gli1
=
ck
*
sci3
-
ci
*
sci4
;
float
gli2
=
-
sc3
*
sci4
-
sci3
*
sc4
;
float
gli3
=
sci3
*
sc6
-
sci4
*
sc5
;
float
gli6
=
sci1
;
float
gli7
=
2.0
f
*
(
sci7
-
sci8
);
float
glip1
=
ck
*
scip3
-
ci
*
scip4
;
float
glip2
=
-
sc3
*
scip4
-
scip3
*
sc4
;
float
glip3
=
scip3
*
sc6
-
scip4
*
sc5
;
float
glip6
=
scip1
;
float
glip7
=
2.0
f
*
(
scip7
-
scip8
);
float
ei
=
(
bn1
*
(
gli1
+
gli6
)
+
bn2
*
(
gli2
+
gli7
)
+
bn3
*
gli3
);
float
psc3
=
1.0
f
-
scale3
*
scalingFactors
[
PScaleIndex
];
ei
-=
(
rr3
*
(
gli1
+
gli6
)
*
psc3
+
rr5
*
(
gli2
+
gli7
)
*
psc5
+
rr7
*
gli3
*
psc7
);
forceTorqueEnergy
->
w
+=
0.5
f
*
ei
;
float
dsc3
=
1.0
f
-
scale3
*
scalingFactors
[
DScaleIndex
];
float
usc3
=
1.0
f
-
scale3
*
scalingFactors
[
UScaleIndex
];
float
gfi1
=
(
bn2
*
(
gli1
+
glip1
+
gli6
+
glip6
)
+
bn2
*
scip2
+
bn3
*
(
gli2
+
glip2
+
gli7
+
glip7
)
-
bn3
*
(
sci3
*
scip4
+
scip3
*
sci4
)
+
bn4
*
(
gli3
+
glip3
));
gfi1
-=
rr5
*
((
gli1
+
gli6
)
*
psc3
+
(
glip1
+
glip6
)
*
dsc3
+
scip2
*
usc3
)
+
rr7
*
((
gli7
+
gli2
)
*
psc5
+
(
glip7
+
glip2
)
*
dsc5
-
(
sci3
*
scip4
+
scip3
*
sci4
)
*
usc5
)
+
rr9
*
(
gli3
*
psc7
+
glip3
*
dsc7
);
gfi1
*=
0.5
f
;
forceTorqueEnergy
->
x
+=
gfi1
*
xr
;
forceTorqueEnergy
->
y
+=
gfi1
*
yr
;
forceTorqueEnergy
->
z
+=
gfi1
*
zr
;
float
gfi2
=
-
ck
*
bn1
+
sc4
*
bn2
-
sc6
*
bn3
;
float
gfi3
=
ci
*
bn1
+
sc3
*
bn2
+
sc5
*
bn3
;
float
ftm2i1
=
gfi2
*
(
atomI
.
inducedDipole
[
0
]
+
atomI
.
inducedDipoleP
[
0
])
+
gfi3
*
(
atomJ
.
inducedDipole
[
0
]
+
atomJ
.
inducedDipoleP
[
0
]);
float
ftm2i2
=
gfi2
*
(
atomI
.
inducedDipole
[
1
]
+
atomI
.
inducedDipoleP
[
1
])
+
gfi3
*
(
atomJ
.
inducedDipole
[
1
]
+
atomJ
.
inducedDipoleP
[
1
]);
float
ftm2i3
=
gfi2
*
(
atomI
.
inducedDipole
[
2
]
+
atomI
.
inducedDipoleP
[
2
])
+
gfi3
*
(
atomJ
.
inducedDipole
[
2
]
+
atomJ
.
inducedDipoleP
[
2
]);
forceTorqueEnergy
->
x
-=
0.5
f
*
(
-
rr3
*
ck
*
(
atomI
.
inducedDipole
[
0
]
*
psc3
+
atomI
.
inducedDipoleP
[
0
]
*
dsc3
)
+
rr5
*
sc4
*
(
atomI
.
inducedDipole
[
0
]
*
psc5
+
atomI
.
inducedDipoleP
[
0
]
*
dsc5
)
-
rr7
*
sc6
*
(
atomI
.
inducedDipole
[
0
]
*
psc7
+
atomI
.
inducedDipoleP
[
0
]
*
dsc7
)
+
rr3
*
ci
*
(
atomJ
.
inducedDipole
[
0
]
*
psc3
+
atomJ
.
inducedDipoleP
[
0
]
*
dsc3
)
+
rr5
*
sc3
*
(
atomJ
.
inducedDipole
[
0
]
*
psc5
+
atomJ
.
inducedDipoleP
[
0
]
*
dsc5
)
+
rr7
*
sc5
*
(
atomJ
.
inducedDipole
[
0
]
*
psc7
+
atomJ
.
inducedDipoleP
[
0
]
*
dsc7
));
forceTorqueEnergy
->
y
-=
0.5
f
*
(
-
rr3
*
ck
*
(
atomI
.
inducedDipole
[
1
]
*
psc3
+
atomI
.
inducedDipoleP
[
1
]
*
dsc3
)
+
rr5
*
sc4
*
(
atomI
.
inducedDipole
[
1
]
*
psc5
+
atomI
.
inducedDipoleP
[
1
]
*
dsc5
)
-
rr7
*
sc6
*
(
atomI
.
inducedDipole
[
1
]
*
psc7
+
atomI
.
inducedDipoleP
[
1
]
*
dsc7
)
+
rr3
*
ci
*
(
atomJ
.
inducedDipole
[
1
]
*
psc3
+
atomJ
.
inducedDipoleP
[
1
]
*
dsc3
)
+
rr5
*
sc3
*
(
atomJ
.
inducedDipole
[
1
]
*
psc5
+
atomJ
.
inducedDipoleP
[
1
]
*
dsc5
)
+
rr7
*
sc5
*
(
atomJ
.
inducedDipole
[
1
]
*
psc7
+
atomJ
.
inducedDipoleP
[
1
]
*
dsc7
));
forceTorqueEnergy
->
z
-=
0.5
f
*
(
-
rr3
*
ck
*
(
atomI
.
inducedDipole
[
2
]
*
psc3
+
atomI
.
inducedDipoleP
[
2
]
*
dsc3
)
+
rr5
*
sc4
*
(
atomI
.
inducedDipole
[
2
]
*
psc5
+
atomI
.
inducedDipoleP
[
2
]
*
dsc5
)
-
rr7
*
sc6
*
(
atomI
.
inducedDipole
[
2
]
*
psc7
+
atomI
.
inducedDipoleP
[
2
]
*
dsc7
)
+
rr3
*
ci
*
(
atomJ
.
inducedDipole
[
2
]
*
psc3
+
atomJ
.
inducedDipoleP
[
2
]
*
dsc3
)
+
rr5
*
sc3
*
(
atomJ
.
inducedDipole
[
2
]
*
psc5
+
atomJ
.
inducedDipoleP
[
2
]
*
dsc5
)
+
rr7
*
sc5
*
(
atomJ
.
inducedDipole
[
2
]
*
psc7
+
atomJ
.
inducedDipoleP
[
2
]
*
dsc7
));
if
(
damp
!=
0.0
f
){
float
expdamp
=
exp
(
damp
);
float
temp3
=
-
3.0
f
*
damp
*
expdamp
*
rr2
;
float
temp5
=
-
damp
;
float
temp7
=
-
0.2
f
-
0.6
f
*
damp
;
float
ddsc31
=
temp3
*
xr
;
float
ddsc32
=
temp3
*
yr
;
float
ddsc33
=
temp3
*
zr
;
float
ddsc51
=
temp5
*
ddsc31
;
float
ddsc52
=
temp5
*
ddsc32
;
float
ddsc53
=
temp5
*
ddsc33
;
float
ddsc71
=
temp7
*
ddsc51
;
float
ddsc72
=
temp7
*
ddsc52
;
float
ddsc73
=
temp7
*
ddsc53
;
temp3
=
rr3
*
((
gli1
+
gli6
)
*
scalingFactors
[
PScaleIndex
]
+
(
glip1
+
glip6
)
*
scalingFactors
[
DScaleIndex
]);
temp5
=
rr5
*
((
gli2
+
gli7
)
*
scalingFactors
[
PScaleIndex
]
+
(
glip2
+
glip7
)
*
scalingFactors
[
DScaleIndex
]);
temp7
=
rr7
*
(
gli3
*
scalingFactors
[
PScaleIndex
]
+
glip3
*
scalingFactors
[
DScaleIndex
]);
ftm2i1
-=
(
temp3
*
ddsc31
+
temp5
*
ddsc51
+
temp7
*
ddsc71
);
ftm2i2
-=
(
temp3
*
ddsc32
+
temp5
*
ddsc52
+
temp7
*
ddsc72
);
ftm2i3
-=
(
temp3
*
ddsc33
+
temp5
*
ddsc53
+
temp7
*
ddsc73
);
if
(
cAmoebaSim
.
polarizationType
==
0
){
temp3
=
rr3
*
scalingFactors
[
UScaleIndex
]
*
scip2
;
temp5
=
-
rr5
*
scalingFactors
[
UScaleIndex
]
*
(
sci3
*
scip4
+
scip3
*
sci4
);
ftm2i1
-=
(
temp3
*
ddsc31
+
temp5
*
ddsc51
);
ftm2i2
-=
(
temp3
*
ddsc32
+
temp5
*
ddsc52
);
ftm2i3
-=
(
temp3
*
ddsc33
+
temp5
*
ddsc53
);
}
}
if
(
cAmoebaSim
.
polarizationType
){
float
gfd
=
(
bn2
*
scip2
-
bn3
*
(
scip3
*
sci4
+
sci3
*
scip4
))
-
(
rr5
*
scip2
*
usc3
-
rr7
*
(
scip3
*
sci4
+
sci3
*
scip4
)
*
usc5
);
float
p1
=
(
bn2
-
usc5
*
rr5
);
ftm2i1
-=
gfd
*
xr
+
p1
*
(
sci4
*
atomI
.
inducedDipoleP
[
0
]
+
scip4
*
atomI
.
inducedDipole
[
0
]
+
sci3
*
atomJ
.
inducedDipoleP
[
0
]
+
scip3
*
atomJ
.
inducedDipole
[
0
]);
ftm2i2
-=
gfd
*
yr
+
p1
*
(
sci4
*
atomI
.
inducedDipoleP
[
1
]
+
scip4
*
atomI
.
inducedDipole
[
1
]
+
sci3
*
atomJ
.
inducedDipoleP
[
1
]
+
scip3
*
atomJ
.
inducedDipole
[
1
]);
ftm2i3
-=
gfd
*
zr
+
p1
*
(
sci4
*
atomI
.
inducedDipoleP
[
2
]
+
scip4
*
atomI
.
inducedDipole
[
2
]
+
sci3
*
atomJ
.
inducedDipoleP
[
2
]
+
scip3
*
atomJ
.
inducedDipole
[
2
]);
}
forceTorqueEnergy
->
x
+=
0.5
f
*
ftm2i1
;
forceTorqueEnergy
->
y
+=
0.5
f
*
ftm2i2
;
forceTorqueEnergy
->
z
+=
0.5
f
*
ftm2i3
;
return
;
}
static
__device__
void
calculatePmeDirectElectrostaticPairIxnT1_kernel
(
const
PmeDirectElectrostaticParticle
&
atomI
,
const
PmeDirectElectrostaticParticle
&
atomJ
,
const
float4
delta
,
const
float4
bn
,
const
float
*
scalingFactors
,
float4
*
forceTorqueEnergy
){
float
xr
=
delta
.
x
;
float
yr
=
delta
.
y
;
float
zr
=
delta
.
z
;
float
r
=
delta
.
w
;
// set the permanent multipole and induced dipole values;
float
di1
=
atomI
.
labFrameDipole
[
0
];
float
di2
=
atomI
.
labFrameDipole
[
1
];
float
di3
=
atomI
.
labFrameDipole
[
2
];
float
qi1
=
atomI
.
labFrameQuadrupole
[
0
];
float
qi2
=
atomI
.
labFrameQuadrupole
[
1
];
float
qi3
=
atomI
.
labFrameQuadrupole
[
2
];
float
qi5
=
atomI
.
labFrameQuadrupole
[
3
];
float
qi6
=
atomI
.
labFrameQuadrupole
[
4
];
float
qi9
=
atomI
.
labFrameQuadrupole
[
5
];
float
ck
=
atomJ
.
q
;
float
dk1
=
atomJ
.
labFrameDipole
[
0
];
float
dk2
=
atomJ
.
labFrameDipole
[
1
];
float
dk3
=
atomJ
.
labFrameDipole
[
2
];
float
qk1
=
atomJ
.
labFrameQuadrupole
[
0
];
float
qk2
=
atomJ
.
labFrameQuadrupole
[
1
];
float
qk3
=
atomJ
.
labFrameQuadrupole
[
2
];
float
qk5
=
atomJ
.
labFrameQuadrupole
[
3
];
float
qk6
=
atomJ
.
labFrameQuadrupole
[
4
];
float
qk9
=
atomJ
.
labFrameQuadrupole
[
5
];
float
bn1
=
bn
.
x
;
float
bn2
=
bn
.
y
;
float
bn3
=
bn
.
z
;
float
bn4
=
bn
.
w
;
// apply Thole polarization damping to scale factors
qii
*=
2.0
f
;
float
rr1
=
1.0
f
/
r
;
float
uii
=
atomI
.
labFrameDipole
[
0
]
*
atomI
.
inducedDipole
[
0
]
+
atomI
.
labFrameDipole
[
1
]
*
atomI
.
inducedDipole
[
1
]
+
atomI
.
labFrameDipole
[
2
]
*
atomI
.
inducedDipole
[
2
];
float
rr2
=
rr1
*
rr1
;
float
rr3
=
rr1
*
rr2
;
float
rr5
=
3.0
f
*
rr3
*
rr2
;
float
rr7
=
5.0
f
*
rr5
*
rr2
;
float
rr9
=
7.0
f
*
rr7
*
rr2
;
float
scale
=
1.0
f
-
scalingFactors
[
MScaleIndex
];
float
prefactor
=
scale
*
rr3
-
bn1
;
float
dixdk1
=
di2
*
dk3
-
di3
*
dk2
;
float
ttm21
=
prefactor
*
dixdk1
;
float
dixdk2
=
di3
*
dk1
-
di1
*
dk3
;
float
ttm22
=
prefactor
*
dixdk2
;
float
dixdk3
=
di1
*
dk2
-
di2
*
dk1
;
float
ttm23
=
prefactor
*
dixdk3
;
float
qir1
=
qi1
*
xr
+
qi2
*
yr
+
qi3
*
zr
;
float
qir2
=
qi2
*
xr
+
qi5
*
yr
+
qi6
*
zr
;
float
qir3
=
qi3
*
xr
+
qi6
*
yr
+
qi9
*
zr
;
float
qkr1
=
qk1
*
xr
+
qk2
*
yr
+
qk3
*
zr
;
float
qkr2
=
qk2
*
xr
+
qk5
*
yr
+
qk6
*
zr
;
float
qkr3
=
qk3
*
xr
+
qk6
*
yr
+
qk9
*
zr
;
float
qiqkr1
=
qi1
*
qkr1
+
qi2
*
qkr2
+
qi3
*
qkr3
;
float
qiqkr2
=
qi2
*
qkr1
+
qi5
*
qkr2
+
qi6
*
qkr3
;
float
qiqkr3
=
qi3
*
qkr1
+
qi6
*
qkr2
+
qi9
*
qkr3
;
float
rxqikr1
=
yr
*
qiqkr3
-
zr
*
qiqkr2
;
float
qkrxqir1
=
qkr2
*
qir3
-
qkr3
*
qir2
;
prefactor
=
4.0
f
*
(
bn3
-
scale
*
rr7
);
ttm21
-=
prefactor
*
(
rxqikr1
+
qkrxqir1
);
float
rxqikr2
=
zr
*
qiqkr1
-
xr
*
qiqkr3
;
float
qkrxqir2
=
qkr3
*
qir1
-
qkr1
*
qir3
;
ttm22
-=
prefactor
*
(
rxqikr2
+
qkrxqir2
);
float
rxqikr3
=
xr
*
qiqkr2
-
yr
*
qiqkr1
;
float
qkrxqir3
=
qkr1
*
qir2
-
qkr2
*
qir1
;
ttm23
-=
prefactor
*
(
rxqikr3
+
qkrxqir3
);
float
qidk1
=
qi1
*
dk1
+
qi2
*
dk2
+
qi3
*
dk3
;
float
qidk2
=
qi2
*
dk1
+
qi5
*
dk2
+
qi6
*
dk3
;
float
qidk3
=
qi3
*
dk1
+
qi6
*
dk2
+
qi9
*
dk3
;
float
dixqkr1
=
di2
*
qkr3
-
di3
*
qkr2
;
float
dkxqir1
=
dk2
*
qir3
-
dk3
*
qir2
;
float
rxqidk1
=
yr
*
qidk3
-
zr
*
qidk2
;
float
qixqk1
=
qi2
*
qk3
+
qi5
*
qk6
+
qi6
*
qk9
-
qi3
*
qk2
-
qi6
*
qk5
-
qi9
*
qk6
;
prefactor
=
2.0
f
*
(
bn2
-
scale
*
rr5
);
ttm21
+=
prefactor
*
(
dixqkr1
+
dkxqir1
+
rxqidk1
-
2.0
f
*
qixqk1
);
float
dixqkr2
=
di3
*
qkr1
-
di1
*
qkr3
;
float
dkxqir2
=
dk3
*
qir1
-
dk1
*
qir3
;
float
rxqidk2
=
zr
*
qidk1
-
xr
*
qidk3
;
float
qixqk2
=
qi3
*
qk1
+
qi6
*
qk2
+
qi9
*
qk3
-
qi1
*
qk3
-
qi2
*
qk6
-
qi3
*
qk9
;
ttm22
+=
prefactor
*
(
dixqkr2
+
dkxqir2
+
rxqidk2
-
2.0
f
*
qixqk2
);
float
dixqkr3
=
di1
*
qkr2
-
di2
*
qkr1
;
float
dkxqir3
=
dk1
*
qir2
-
dk2
*
qir1
;
float
rxqidk3
=
xr
*
qidk2
-
yr
*
qidk1
;
float
qixqk3
=
qi1
*
qk2
+
qi2
*
qk5
+
qi3
*
qk6
-
qi2
*
qk1
-
qi5
*
qk2
-
qi6
*
qk3
;
ttm23
+=
prefactor
*
(
dixqkr3
+
dkxqir3
+
rxqidk3
-
2.0
f
*
qixqk3
);
float
sc4
=
dk1
*
xr
+
dk2
*
yr
+
dk3
*
zr
;
float
sc6
=
qkr1
*
xr
+
qkr2
*
yr
+
qkr3
*
zr
;
float
gf2
=
-
ck
*
bn1
+
sc4
*
bn2
-
sc6
*
bn3
;
float
gfr2
=
-
ck
*
rr3
+
sc4
*
rr5
-
sc6
*
rr7
;
prefactor
=
(
gf2
-
scale
*
gfr2
);
ttm21
+=
prefactor
*
(
di2
*
zr
-
di3
*
yr
);
ttm22
+=
prefactor
*
(
di3
*
xr
-
di1
*
zr
);
ttm23
+=
prefactor
*
(
di1
*
yr
-
di2
*
xr
);
float
gf5
=
(
-
ck
*
bn2
+
sc4
*
bn3
-
sc6
*
bn4
);
float
gfr5
=
(
-
ck
*
rr5
+
sc4
*
rr7
-
sc6
*
rr9
);
prefactor
=
2.0
f
*
(
gf5
-
scale
*
gfr5
);
float
rxqir1
=
yr
*
qir3
-
zr
*
qir2
;
float
rxqir2
=
zr
*
qir1
-
xr
*
qir3
;
float
rxqir3
=
xr
*
qir2
-
yr
*
qir1
;
ttm21
-=
prefactor
*
rxqir1
;
ttm22
-=
prefactor
*
rxqir2
;
ttm23
-=
prefactor
*
rxqir3
;
forceTorqueEnergy
->
x
=
ttm21
;
forceTorqueEnergy
->
y
=
ttm22
;
forceTorqueEnergy
->
z
=
ttm23
;
return
;
float
energy
=
(
cii
+
term
*
(
dii
/
3.0
f
+
2.0
f
*
term
*
qii
/
5.0
f
));
energy
+=
term
*
uii
/
3.0
f
;
energy
*=
fterm
;
*
totalEnergy
+=
energy
;
}
}
static
__device__
void
calculatePmeDirectElectrostaticPairIxnT2_kernel
(
// self-torque for PME
const
PmeDirectElectrostaticParticle
&
atomI
,
const
PmeDirectElectrostaticParticle
&
atomJ
,
const
float4
delta
,
const
float4
bn
,
const
float
*
scalingFactors
,
float4
*
forceTorqueEnergy
){
float
xr
=
delta
.
x
;
float
yr
=
delta
.
y
;
float
zr
=
delta
.
z
;
float
r
=
delta
.
w
;
// set the permanent multipole and induced dipole values;
float
di1
=
atomI
.
labFrameDipole
[
0
];
float
di2
=
atomI
.
labFrameDipole
[
1
];
float
di3
=
atomI
.
labFrameDipole
[
2
];
float
qi1
=
atomI
.
labFrameQuadrupole
[
0
];
float
qi2
=
atomI
.
labFrameQuadrupole
[
1
];
float
qi3
=
atomI
.
labFrameQuadrupole
[
2
];
float
qi5
=
atomI
.
labFrameQuadrupole
[
3
];
float
qi6
=
atomI
.
labFrameQuadrupole
[
4
];
float
qi9
=
atomI
.
labFrameQuadrupole
[
5
];
float
bn1
=
bn
.
x
;
float
bn2
=
bn
.
y
;
float
bn3
=
bn
.
z
;
// apply Thole polarization damping to scale factors
float
rr1
=
1.0
f
/
r
;
float
rr2
=
rr1
*
rr1
;
float
rr3
=
rr1
*
rr2
;
float
rr5
=
3.0
f
*
rr3
*
rr2
;
float
rr7
=
5.0
f
*
rr5
*
rr2
;
float
scale3
=
1.0
f
;
float
scale5
=
1.0
f
;
float
scale7
=
1.0
f
;
float
damp
=
atomI
.
damp
*
atomJ
.
damp
;
if
(
damp
!=
0.0
f
){
float
pgamma
=
atomI
.
thole
<
atomJ
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
float
ratio
=
r
/
damp
;
damp
=
-
pgamma
*
ratio
*
ratio
*
ratio
;
if
(
damp
>
-
50.0
f
){
float
expdamp
=
exp
(
damp
);
scale3
=
1.0
f
-
expdamp
;
scale5
=
1.0
f
-
(
1.0
f
-
damp
)
*
expdamp
;
scale7
=
1.0
f
-
(
1.0
f
-
damp
+
0.6
f
*
damp
*
damp
)
*
expdamp
;
}
}
float
dsc3
=
1.0
f
-
scale3
*
scalingFactors
[
DScaleIndex
];
__device__
static
void
calculatePmeSelfTorqueElectrostaticPairIxn_kernel
(
PmeDirectElectrostaticParticle
&
atomI
)
float
dsc5
=
1.0
f
-
scale5
*
scalingFactors
[
DScaleIndex
];
{
float
dsc7
=
1.0
f
-
scale7
*
scalingFactors
[
DScaleIndex
];
float
term
=
(
2.0
f
/
3.0
f
)
*
(
cSim
.
alphaEwald
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
)
/
cAmoebaSim
.
sqrtPi
;
float
psc3
=
1.0
f
-
scale3
*
scalingFactors
[
PScaleIndex
];
float
psc5
=
1.0
f
-
scale5
*
scalingFactors
[
PScaleIndex
];
float
psc7
=
1.0
f
-
scale7
*
scalingFactors
[
PScaleIndex
];
float
prefactor1
=
0.5
f
*
(
psc3
*
rr3
-
bn1
);
float
prefactor2
=
0.5
f
*
(
dsc3
*
rr3
-
bn1
);
float
dixuk1
=
di2
*
atomJ
.
inducedDipole
[
2
]
-
di3
*
atomJ
.
inducedDipole
[
1
];
float
dixukp1
=
di2
*
atomJ
.
inducedDipoleP
[
2
]
-
di3
*
atomJ
.
inducedDipoleP
[
1
];
float
ttm2i1
=
prefactor1
*
dixuk1
+
prefactor2
*
dixukp1
;
float
dixuk2
=
di3
*
atomJ
.
inducedDipole
[
0
]
-
di1
*
atomJ
.
inducedDipole
[
2
];
float
dixukp2
=
di3
*
atomJ
.
inducedDipoleP
[
0
]
-
di1
*
atomJ
.
inducedDipoleP
[
2
];
float
ttm2i2
=
prefactor1
*
dixuk2
+
prefactor2
*
dixukp2
;
float
dixuk3
=
di1
*
atomJ
.
inducedDipole
[
1
]
-
di2
*
atomJ
.
inducedDipole
[
0
];
float
dixukp3
=
di1
*
atomJ
.
inducedDipoleP
[
1
]
-
di2
*
atomJ
.
inducedDipoleP
[
0
];
float
ttm2i3
=
prefactor1
*
dixuk3
+
prefactor2
*
dixukp3
;
float
sci4
=
atomJ
.
inducedDipole
[
0
]
*
xr
+
atomJ
.
inducedDipole
[
1
]
*
yr
+
atomJ
.
inducedDipole
[
2
]
*
zr
;
float
scip4
=
atomJ
.
inducedDipoleP
[
0
]
*
xr
+
atomJ
.
inducedDipoleP
[
1
]
*
yr
+
atomJ
.
inducedDipoleP
[
2
]
*
zr
;
float
gti2
=
bn2
*
(
sci4
+
scip4
);
float
gtri2
=
rr5
*
(
sci4
*
psc5
+
scip4
*
dsc5
);
prefactor1
=
0.5
f
*
(
gti2
-
gtri2
);
ttm2i1
+=
prefactor1
*
(
di2
*
zr
-
di3
*
yr
);
ttm2i2
+=
prefactor1
*
(
di3
*
xr
-
di1
*
zr
);
ttm2i3
+=
prefactor1
*
(
di1
*
yr
-
di2
*
xr
);
float
qir1
=
qi1
*
xr
+
qi2
*
yr
+
qi3
*
zr
;
float
qir2
=
qi2
*
xr
+
qi5
*
yr
+
qi6
*
zr
;
float
qir3
=
qi3
*
xr
+
qi6
*
yr
+
qi9
*
zr
;
prefactor1
=
rr7
*
(
sci4
*
psc7
+
scip4
*
dsc7
)
-
bn3
*
(
sci4
+
scip4
);
ttm2i1
+=
prefactor1
*
(
yr
*
qir3
-
zr
*
qir2
);
ttm2i2
+=
prefactor1
*
(
zr
*
qir1
-
xr
*
qir3
);
ttm2i3
+=
prefactor1
*
(
xr
*
qir2
-
yr
*
qir1
);
float
qiuk1
=
qi1
*
atomJ
.
inducedDipole
[
0
]
+
qi2
*
atomJ
.
inducedDipole
[
1
]
+
qi3
*
atomJ
.
inducedDipole
[
2
];
float
qiuk2
=
qi2
*
atomJ
.
inducedDipole
[
0
]
+
qi5
*
atomJ
.
inducedDipole
[
1
]
+
qi6
*
atomJ
.
inducedDipole
[
2
];
float
qiuk3
=
qi3
*
atomJ
.
inducedDipole
[
0
]
+
qi6
*
atomJ
.
inducedDipole
[
1
]
+
qi9
*
atomJ
.
inducedDipole
[
2
];
float
qiukp1
=
qi1
*
atomJ
.
inducedDipoleP
[
0
]
+
qi2
*
atomJ
.
inducedDipoleP
[
1
]
+
qi3
*
atomJ
.
inducedDipoleP
[
2
];
float
qiukp2
=
qi2
*
atomJ
.
inducedDipoleP
[
0
]
+
qi5
*
atomJ
.
inducedDipoleP
[
1
]
+
qi6
*
atomJ
.
inducedDipoleP
[
2
];
float
qiukp3
=
qi3
*
atomJ
.
inducedDipoleP
[
0
]
+
qi6
*
atomJ
.
inducedDipoleP
[
1
]
+
qi9
*
atomJ
.
inducedDipoleP
[
2
];
prefactor1
=
(
bn2
-
rr5
*
psc5
);
prefactor2
=
(
bn2
-
rr5
*
dsc5
);
float
ukxqir1
=
atomJ
.
inducedDipole
[
1
]
*
qir3
-
atomJ
.
inducedDipole
[
2
]
*
qir2
;
float
ukxqirp1
=
atomJ
.
inducedDipoleP
[
1
]
*
qir3
-
atomJ
.
inducedDipoleP
[
2
]
*
qir2
;
float
rxqiuk1
=
yr
*
qiuk3
-
zr
*
qiuk2
;
float
rxqiukp1
=
yr
*
qiukp3
-
zr
*
qiukp2
;
ttm2i1
+=
prefactor1
*
(
ukxqir1
+
rxqiuk1
)
+
prefactor2
*
(
ukxqirp1
+
rxqiukp1
);
float
ukxqir2
=
atomJ
.
inducedDipole
[
2
]
*
qir1
-
atomJ
.
inducedDipole
[
0
]
*
qir3
;
float
ukxqirp2
=
atomJ
.
inducedDipoleP
[
2
]
*
qir1
-
atomJ
.
inducedDipoleP
[
0
]
*
qir3
;
float
rxqiuk2
=
zr
*
qiuk1
-
xr
*
qiuk3
;
float
rxqiukp2
=
zr
*
qiukp1
-
xr
*
qiukp3
;
ttm2i2
+=
prefactor1
*
(
ukxqir2
+
rxqiuk2
)
+
prefactor2
*
(
ukxqirp2
+
rxqiukp2
);
float
ukxqir3
=
atomJ
.
inducedDipole
[
0
]
*
qir2
-
atomJ
.
inducedDipole
[
1
]
*
qir1
;
float
ukxqirp3
=
atomJ
.
inducedDipoleP
[
0
]
*
qir2
-
atomJ
.
inducedDipoleP
[
1
]
*
qir1
;
float
rxqiuk3
=
xr
*
qiuk2
-
yr
*
qiuk1
;
float
rxqiukp3
=
xr
*
qiukp2
-
yr
*
qiukp1
;
ttm2i3
+=
prefactor1
*
(
ukxqir3
+
rxqiuk3
)
+
prefactor2
*
(
ukxqirp3
+
rxqiukp3
);
forceTorqueEnergy
->
x
+=
ttm2i1
;
forceTorqueEnergy
->
y
+=
ttm2i2
;
forceTorqueEnergy
->
z
+=
ttm2i3
;
return
;
float
uix
=
(
atomI
.
inducedDipole
[
0
]
+
atomI
.
inducedDipoleP
[
0
]);
float
uiy
=
(
atomI
.
inducedDipole
[
1
]
+
atomI
.
inducedDipoleP
[
1
]);
float
uiz
=
(
atomI
.
inducedDipole
[
2
]
+
atomI
.
inducedDipoleP
[
2
]);
atomI
.
torque
[
0
]
+=
term
*
(
atomI
.
labFrameDipole
[
1
]
*
uiz
-
atomI
.
labFrameDipole
[
2
]
*
uiy
);
atomI
.
torque
[
1
]
+=
term
*
(
atomI
.
labFrameDipole
[
2
]
*
uix
-
atomI
.
labFrameDipole
[
0
]
*
uiz
);
atomI
.
torque
[
2
]
+=
term
*
(
atomI
.
labFrameDipole
[
0
]
*
uiy
-
atomI
.
labFrameDipole
[
1
]
*
uix
);
}
}
__device__
void
calculatePmeDirectElectrostaticPairIxn_kernel
(
const
PmeDirectElectrostaticParticle
&
atomI
,
const
PmeDirectElectrostaticParticle
&
atomJ
,
__device__
void
calculateBn_kernel
(
float
r
,
float4
*
bn
,
float
*
bn0
,
float
*
bn5
){
const
float
*
scalingFactors
,
float4
forceTorqueEnergy
[
3
]
){
float4
delta
;
delta
.
x
=
atomJ
.
x
-
atomI
.
x
;
delta
.
y
=
atomJ
.
y
-
atomI
.
y
;
delta
.
z
=
atomJ
.
z
-
atomI
.
z
;
// periodic box
delta
.
x
-=
floor
(
delta
.
x
*
cSim
.
invPeriodicBoxSizeX
+
0.5
f
)
*
cSim
.
periodicBoxSizeX
;
delta
.
y
-=
floor
(
delta
.
y
*
cSim
.
invPeriodicBoxSizeY
+
0.5
f
)
*
cSim
.
periodicBoxSizeY
;
delta
.
z
-=
floor
(
delta
.
z
*
cSim
.
invPeriodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
delta
.
w
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
if
(
delta
.
w
>
cSim
.
nonbondedCutoffSqr
||
delta
.
w
==
0.0
f
){
forceTorqueEnergy
[
0
].
x
=
forceTorqueEnergy
[
0
].
y
=
forceTorqueEnergy
[
0
].
z
=
forceTorqueEnergy
[
0
].
w
=
0.0
f
;
forceTorqueEnergy
[
1
].
x
=
forceTorqueEnergy
[
1
].
y
=
forceTorqueEnergy
[
1
].
z
=
0.0
f
;
forceTorqueEnergy
[
2
].
x
=
forceTorqueEnergy
[
2
].
y
=
forceTorqueEnergy
[
2
].
z
=
0.0
f
;
return
;
}
delta
.
w
=
sqrt
(
delta
.
w
);
float
r
=
delta
.
w
;
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
0.0
f
;
float
alsq2n
=
0.0
f
;
if
(
cSim
.
alphaEwald
>
0.0
f
){
if
(
cSim
.
alphaEwald
>
0.0
f
){
...
@@ -935,59 +175,45 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( const PmeDirectEl
...
@@ -935,59 +175,45 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( const PmeDirectEl
float
exp2a
=
exp
(
-
(
ralpha
*
ralpha
));
float
exp2a
=
exp
(
-
(
ralpha
*
ralpha
));
float
rr1
=
1.0
f
/
r
;
float
rr1
=
1.0
f
/
r
;
float
bn0
=
erfc
(
ralpha
)
*
rr1
;
*
bn0
=
erfc
(
ralpha
)
*
rr1
;
float
rr2
=
rr1
*
rr1
;
float
rr2
=
rr1
*
rr1
;
forceTorqueEnergy
[
0
].
w
=
atomI
.
q
*
atomJ
.
q
*
bn0
;
alsq2n
*=
alsq2
;
alsq2n
*=
alsq2
;
float4
bn
;
bn
->
x
=
(
*
bn0
+
alsq2n
*
exp2a
)
*
rr2
;
bn
.
x
=
(
bn0
+
alsq2n
*
exp2a
)
*
rr2
;
alsq2n
*=
alsq2
;
alsq2n
*=
alsq2
;
bn
.
y
=
(
3.0
f
*
bn
.
x
+
alsq2n
*
exp2a
)
*
rr2
;
bn
->
y
=
(
3.0
f
*
bn
->
x
+
alsq2n
*
exp2a
)
*
rr2
;
alsq2n
*=
alsq2
;
alsq2n
*=
alsq2
;
bn
.
z
=
(
5.0
f
*
bn
.
y
+
alsq2n
*
exp2a
)
*
rr2
;
bn
->
z
=
(
5.0
f
*
bn
->
y
+
alsq2n
*
exp2a
)
*
rr2
;
alsq2n
*=
alsq2
;
alsq2n
*=
alsq2
;
bn
.
w
=
(
7.0
f
*
bn
.
z
+
alsq2n
*
exp2a
)
*
rr2
;
bn
->
w
=
(
7.0
f
*
bn
->
z
+
alsq2n
*
exp2a
)
*
rr2
;
alsq2n
*=
alsq2
;
alsq2n
*=
alsq2
;
float
bn5
=
(
9.0
f
*
bn
.
w
+
alsq2n
*
exp2a
)
*
rr2
;
*
bn5
=
(
9.0
f
*
bn
->
w
+
alsq2n
*
exp2a
)
*
rr2
;
//calculatePmeDirectElectrostaticPairIxn_F1_kernel( atomI, atomJ, delta, scalingFactors, forceTorqueEnergy );
calculatePmeDirectElectrostaticPairIxnF1_kernel
(
atomI
,
atomJ
,
delta
,
bn
,
bn5
,
scalingFactors
,
forceTorqueEnergy
);
//calculatePmeDirectElectrostaticPairIxn_F2_kernel( atomI, atomJ, delta, scalingFactors, forceTorqueEnergy );
calculatePmeDirectElectrostaticPairIxnF2_kernel
(
atomI
,
atomJ
,
delta
,
bn
,
scalingFactors
,
forceTorqueEnergy
);
//calculatePmeDirectElectrostaticPairIxn_T1_kernel( atomI, atomJ, delta, scalingFactors, forceTorqueEnergy );
calculatePmeDirectElectrostaticPairIxnT1_kernel
(
atomI
,
atomJ
,
delta
,
bn
,
scalingFactors
,
(
forceTorqueEnergy
+
1
)
);
//calculatePmeDirectElectrostaticPairIxn_T2_kernel( atomI, atomJ, delta, scalingFactors, forceTorqueEnergy );
calculatePmeDirectElectrostaticPairIxnT2_kernel
(
atomI
,
atomJ
,
delta
,
bn
,
scalingFactors
,
(
forceTorqueEnergy
+
1
)
);
//calculatePmeDirectElectrostaticPairIxn_T3_kernel( atomI, atomJ, delta, scalingFactors, forceTorqueEnergy );
//calculatePmeDirectElectrostaticPairIxnT3_kernel( atomI, atomJ, delta, bn, scalingFactors, (forceTorqueEnergy+2) );
// T3 == T1 w/ particles I and J reversed
// T4 == T2 w/ particles I and J reversed
delta
.
x
*=
-
1.0
f
;
delta
.
y
*=
-
1.0
f
;
delta
.
z
*=
-
1.0
f
;
calculatePmeDirectElectrostaticPairIxnT1_kernel
(
atomJ
,
atomI
,
delta
,
bn
,
scalingFactors
,
(
forceTorqueEnergy
+
2
)
);
calculatePmeDirectElectrostaticPairIxnT2_kernel
(
atomJ
,
atomI
,
delta
,
bn
,
scalingFactors
,
(
forceTorqueEnergy
+
2
)
);
//calculatePmeDirectElectrostaticPairIxn_T4_kernel( atomI, atomJ, delta, scalingFactors, forceTorqueEnergy );
//calculatePmeDirectElectrostaticPairIxnT4_kernel( atomI, atomJ, delta, scalingFactors, (forceTorqueEnergy+2) );
return
;
}
}
__device__
void
calculatePmeDirectElectrostaticPairIxnOrig_kernel
(
PmeDirectElectrostaticParticle
&
atomI
,
PmeDirectElectrostaticParticle
&
atomJ
,
#define SUB_METHOD_NAME(a, b) a##Scale##b
float
*
scalingFactors
,
float4
forceTorqueEnergy
[
3
]
#define APPLY_SCALE
#include "kCalculateAmoebaCudaPmeDirectElectrostaticF1.h"
#include "kCalculateAmoebaCudaPmeDirectElectrostaticF2P.h"
#include "kCalculateAmoebaCudaPmeDirectElectrostaticT1.h"
#include "kCalculateAmoebaCudaPmeDirectElectrostaticT2.h"
//#include "kCalculateAmoebaCudaPmeDirectElectrostaticDriver.h"
#undef APPLY_SCALE
#undef SUB_METHOD_NAME
#define SUB_METHOD_NAME(a, b) a##NoScale##b
#include "kCalculateAmoebaCudaPmeDirectElectrostaticF1.h"
#include "kCalculateAmoebaCudaPmeDirectElectrostaticF2P.h"
//#include "kCalculateAmoebaCudaPmeDirectElectrostaticT1.h"
//#include "kCalculateAmoebaCudaPmeDirectElectrostaticT2.h"
//#include "kCalculateAmoebaCudaPmeDirectElectrostaticDriver.h"
#undef SUB_METHOD_NAME
__device__
void
calculatePmeDirectElectrostaticPairIxnOrig_kernel
(
const
PmeDirectElectrostaticParticle
&
atomI
,
const
PmeDirectElectrostaticParticle
&
atomJ
,
const
float
*
scalingFactors
,
float4
forceTorqueEnergy
[
3
]
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
float4
*
debugArray
#endif
#endif
...
@@ -1022,7 +248,8 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrig_kernel( PmeDirectElec
...
@@ -1022,7 +248,8 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrig_kernel( PmeDirectElec
float
qi3
=
atomI
.
labFrameQuadrupole
[
2
];
float
qi3
=
atomI
.
labFrameQuadrupole
[
2
];
float
qi5
=
atomI
.
labFrameQuadrupole
[
3
];
float
qi5
=
atomI
.
labFrameQuadrupole
[
3
];
float
qi6
=
atomI
.
labFrameQuadrupole
[
4
];
float
qi6
=
atomI
.
labFrameQuadrupole
[
4
];
float
qi9
=
atomI
.
labFrameQuadrupole
[
5
];
//float qi9 = atomI.labFrameQuadrupole[5];
float
qi9
=
-
(
atomI
.
labFrameQuadrupole
[
0
]
+
atomI
.
labFrameQuadrupole
[
3
]);
float
dk1
=
atomJ
.
labFrameDipole
[
0
];
float
dk1
=
atomJ
.
labFrameDipole
[
0
];
float
dk2
=
atomJ
.
labFrameDipole
[
1
];
float
dk2
=
atomJ
.
labFrameDipole
[
1
];
...
@@ -1033,7 +260,8 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrig_kernel( PmeDirectElec
...
@@ -1033,7 +260,8 @@ __device__ void calculatePmeDirectElectrostaticPairIxnOrig_kernel( PmeDirectElec
float
qk3
=
atomJ
.
labFrameQuadrupole
[
2
];
float
qk3
=
atomJ
.
labFrameQuadrupole
[
2
];
float
qk5
=
atomJ
.
labFrameQuadrupole
[
3
];
float
qk5
=
atomJ
.
labFrameQuadrupole
[
3
];
float
qk6
=
atomJ
.
labFrameQuadrupole
[
4
];
float
qk6
=
atomJ
.
labFrameQuadrupole
[
4
];
float
qk9
=
atomJ
.
labFrameQuadrupole
[
5
];
//float qk9 = atomJ.labFrameQuadrupole[5];
float
qk9
=
-
(
atomI
.
labFrameQuadrupole
[
0
]
+
atomJ
.
labFrameQuadrupole
[
3
]);
// calculate the real space error function terms
// calculate the real space error function terms
...
@@ -1838,7 +1066,201 @@ for( int ii = 0; ii < 12; ii++ ){
...
@@ -1838,7 +1066,201 @@ for( int ii = 0; ii < 12; ii++ ){
}
}
__device__
void
loadPmeDirectElectrostaticShared
(
struct
PmeDirectElectrostaticParticle
*
sA
,
unsigned
int
atomI
)
__device__
void
calculatePmeDirectElectrostaticPairIxn_kernel
(
PmeDirectElectrostaticParticle
&
atomI
,
PmeDirectElectrostaticParticle
&
atomJ
,
bool
bExclusionFlag
,
const
float
*
scalingFactors
,
float
forceFactor
,
float
*
energy
){
float4
delta
;
delta
.
x
=
atomJ
.
x
-
atomI
.
x
;
delta
.
y
=
atomJ
.
y
-
atomI
.
y
;
delta
.
z
=
atomJ
.
z
-
atomI
.
z
;
// periodic box
delta
.
x
-=
floor
(
delta
.
x
*
cSim
.
invPeriodicBoxSizeX
+
0.5
f
)
*
cSim
.
periodicBoxSizeX
;
delta
.
y
-=
floor
(
delta
.
y
*
cSim
.
invPeriodicBoxSizeY
+
0.5
f
)
*
cSim
.
periodicBoxSizeY
;
delta
.
z
-=
floor
(
delta
.
z
*
cSim
.
invPeriodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
delta
.
w
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
if
(
delta
.
w
>
cSim
.
nonbondedCutoffSqr
){
return
;
}
float
r
=
sqrt
(
delta
.
w
);
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
0.0
f
;
if
(
cSim
.
alphaEwald
>
0.0
f
){
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cSim
.
alphaEwald
);
}
float
exp2a
=
exp
(
-
(
ralpha
*
ralpha
));
float
rr1
=
1.0
f
/
r
;
delta
.
w
=
rr1
;
float
bn0
=
erfc
(
ralpha
)
*
rr1
;
*
energy
+=
forceFactor
*
atomI
.
q
*
atomJ
.
q
*
bn0
;
float
rr2
=
rr1
*
rr1
;
alsq2n
*=
alsq2
;
float4
bn
;
bn
.
x
=
(
bn0
+
alsq2n
*
exp2a
)
*
rr2
;
alsq2n
*=
alsq2
;
bn
.
y
=
(
3.0
f
*
bn
.
x
+
alsq2n
*
exp2a
)
*
rr2
;
alsq2n
*=
alsq2
;
bn
.
z
=
(
5.0
f
*
bn
.
y
+
alsq2n
*
exp2a
)
*
rr2
;
alsq2n
*=
alsq2
;
bn
.
w
=
(
7.0
f
*
bn
.
z
+
alsq2n
*
exp2a
)
*
rr2
;
alsq2n
*=
alsq2
;
float
bn5
=
(
9.0
f
*
bn
.
w
+
alsq2n
*
exp2a
)
*
rr2
;
/*
float bn0, bn5;
float4 bn;
calculateBn_kernel( r, &bn, &bn0, &bn5 );
*energy += forceFactor*atomI.q*atomJ.q*bn0;
delta.w = 1.0f/r;
*/
float
force
[
3
];
//calculatePmeDirectElectrostaticPairIxnF1Scale_kernel( atomI, atomJ, delta, bn, bn5, forceFactor, scalingFactors, force, energy );
if
(
bExclusionFlag
){
calculatePmeDirectElectrostaticPairIxnF1Scale_kernel
(
atomI
,
atomJ
,
delta
,
bn
,
bn5
,
forceFactor
,
scalingFactors
,
force
,
energy
);
calculatePmeDirectElectrostaticPairIxnF2Scale_kernel
(
atomI
,
atomJ
,
delta
,
bn
,
forceFactor
,
scalingFactors
,
force
,
energy
);
}
else
{
calculatePmeDirectElectrostaticPairIxnF1NoScale_kernel
(
atomI
,
atomJ
,
delta
,
bn
,
bn5
,
forceFactor
,
force
,
energy
);
calculatePmeDirectElectrostaticPairIxnF2NoScale_kernel
(
atomI
,
atomJ
,
delta
,
bn
,
forceFactor
,
force
,
energy
);
}
atomI
.
force
[
0
]
+=
force
[
0
];
atomI
.
force
[
1
]
+=
force
[
1
];
atomI
.
force
[
2
]
+=
force
[
2
];
if
(
forceFactor
==
1.0
f
){
atomJ
.
force
[
0
]
-=
force
[
0
];
atomJ
.
force
[
1
]
-=
force
[
1
];
atomJ
.
force
[
2
]
-=
force
[
2
];
}
calculatePmeDirectElectrostaticPairIxnT1Scale_kernel
(
atomI
,
atomJ
,
delta
,
bn
,
scalingFactors
);
calculatePmeDirectElectrostaticPairIxnT2Scale_kernel
(
atomI
,
atomJ
,
delta
,
bn
,
scalingFactors
);
if
(
forceFactor
==
1.0
f
){
// T3 == T1 w/ particles I and J reversed
// T4 == T2 w/ particles I and J reversed
delta
.
x
*=
-
1.0
f
;
delta
.
y
*=
-
1.0
f
;
delta
.
z
*=
-
1.0
f
;
calculatePmeDirectElectrostaticPairIxnT1Scale_kernel
(
atomJ
,
atomI
,
delta
,
bn
,
scalingFactors
);
calculatePmeDirectElectrostaticPairIxnT2Scale_kernel
(
atomJ
,
atomI
,
delta
,
bn
,
scalingFactors
);
}
return
;
}
#ifdef OLD
__device__
void
calculatePmeDirectElectrostaticPairIxnOrigg_kernel
(
PmeDirectElectrostaticParticle
&
atomI
,
PmeDirectElectrostaticParticle
&
atomJ
,
bool
bExclusionFlag
,
const
float
*
scalingFactors
,
float
forceFactor
,
float
*
energy
){
float4
delta
;
delta
.
x
=
atomJ
.
x
-
atomI
.
x
;
delta
.
y
=
atomJ
.
y
-
atomI
.
y
;
delta
.
z
=
atomJ
.
z
-
atomI
.
z
;
// periodic box
delta
.
x
-=
floor
(
delta
.
x
*
cSim
.
invPeriodicBoxSizeX
+
0.5
f
)
*
cSim
.
periodicBoxSizeX
;
delta
.
y
-=
floor
(
delta
.
y
*
cSim
.
invPeriodicBoxSizeY
+
0.5
f
)
*
cSim
.
periodicBoxSizeY
;
delta
.
z
-=
floor
(
delta
.
z
*
cSim
.
invPeriodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
delta
.
w
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
if
(
delta
.
w
>
cSim
.
nonbondedCutoffSqr
){
return
;
}
float
r
=
sqrt
(
delta
.
w
);
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
0.0
f
;
if
(
cSim
.
alphaEwald
>
0.0
f
){
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cSim
.
alphaEwald
);
}
float
exp2a
=
exp
(
-
(
ralpha
*
ralpha
));
float
rr1
=
1.0
f
/
r
;
delta
.
w
=
rr1
;
float
bn0
=
erfc
(
ralpha
)
*
rr1
;
*
energy
+=
forceFactor
*
atomI
.
q
*
atomJ
.
q
*
bn0
;
float
rr2
=
rr1
*
rr1
;
alsq2n
*=
alsq2
;
float4
bn
;
bn
.
x
=
(
bn0
+
alsq2n
*
exp2a
)
*
rr2
;
alsq2n
*=
alsq2
;
bn
.
y
=
(
3.0
f
*
bn
.
x
+
alsq2n
*
exp2a
)
*
rr2
;
alsq2n
*=
alsq2
;
bn
.
z
=
(
5.0
f
*
bn
.
y
+
alsq2n
*
exp2a
)
*
rr2
;
alsq2n
*=
alsq2
;
bn
.
w
=
(
7.0
f
*
bn
.
z
+
alsq2n
*
exp2a
)
*
rr2
;
alsq2n
*=
alsq2
;
float
bn5
=
(
9.0
f
*
bn
.
w
+
alsq2n
*
exp2a
)
*
rr2
;
float
force
[
3
];
//calculatePmeDirectElectrostaticPairIxnF1Scale_kernel( atomI, atomJ, delta, bn, bn5, forceFactor, scalingFactors, force, energy );
if
(
bExclusionFlag
){
calculatePmeDirectElectrostaticPairIxnF1Scale_kernel
(
atomI
,
atomJ
,
delta
,
bn
,
bn5
,
forceFactor
,
scalingFactors
,
force
,
energy
);
calculatePmeDirectElectrostaticPairIxnF2Scale_kernel
(
atomI
,
atomJ
,
delta
,
bn
,
forceFactor
,
scalingFactors
,
force
,
energy
);
}
else
{
calculatePmeDirectElectrostaticPairIxnF1NoScale_kernel
(
atomI
,
atomJ
,
delta
,
bn
,
bn5
,
forceFactor
,
force
,
energy
);
calculatePmeDirectElectrostaticPairIxnF2NoScale_kernel
(
atomI
,
atomJ
,
delta
,
bn
,
forceFactor
,
force
,
energy
);
}
atomI
.
force
[
0
]
+=
force
[
0
];
atomI
.
force
[
1
]
+=
force
[
1
];
atomI
.
force
[
2
]
+=
force
[
2
];
if
(
forceFactor
==
1.0
f
){
atomJ
.
force
[
0
]
-=
force
[
0
];
atomJ
.
force
[
1
]
-=
force
[
1
];
atomJ
.
force
[
2
]
-=
force
[
2
];
}
calculatePmeDirectElectrostaticPairIxnT1Scale_kernel
(
atomI
,
atomJ
,
delta
,
bn
,
scalingFactors
);
calculatePmeDirectElectrostaticPairIxnT2Scale_kernel
(
atomI
,
atomJ
,
delta
,
bn
,
scalingFactors
);
if
(
forceFactor
==
1.0
f
){
// T3 == T1 w/ particles I and J reversed
// T4 == T2 w/ particles I and J reversed
delta
.
x
*=
-
1.0
f
;
delta
.
y
*=
-
1.0
f
;
delta
.
z
*=
-
1.0
f
;
calculatePmeDirectElectrostaticPairIxnT1Scale_kernel
(
atomJ
,
atomI
,
delta
,
bn
,
scalingFactors
);
calculatePmeDirectElectrostaticPairIxnT2Scale_kernel
(
atomJ
,
atomI
,
delta
,
bn
,
scalingFactors
);
}
return
;
}
#endif
__device__
void
loadPmeDirectElectrostaticParticle
(
unsigned
int
atomI
,
struct
PmeDirectElectrostaticParticle
*
sA
)
{
{
// coordinates & charge
// coordinates & charge
float4
posq
=
cSim
.
pPosq
[
atomI
];
float4
posq
=
cSim
.
pPosq
[
atomI
];
...
@@ -1853,7 +1275,6 @@ __device__ void loadPmeDirectElectrostaticShared( struct PmeDirectElectrostaticP
...
@@ -1853,7 +1275,6 @@ __device__ void loadPmeDirectElectrostaticShared( struct PmeDirectElectrostaticP
sA
->
labFrameDipole
[
1
]
=
cAmoebaSim
.
pLabFrameDipole
[
atomI
*
3
+
1
];
sA
->
labFrameDipole
[
1
]
=
cAmoebaSim
.
pLabFrameDipole
[
atomI
*
3
+
1
];
sA
->
labFrameDipole
[
2
]
=
cAmoebaSim
.
pLabFrameDipole
[
atomI
*
3
+
2
];
sA
->
labFrameDipole
[
2
]
=
cAmoebaSim
.
pLabFrameDipole
[
atomI
*
3
+
2
];
// lab quadrupole
// lab quadrupole
sA
->
labFrameQuadrupole
[
0
]
=
cAmoebaSim
.
pLabFrameQuadrupole
[
atomI
*
9
];
sA
->
labFrameQuadrupole
[
0
]
=
cAmoebaSim
.
pLabFrameQuadrupole
[
atomI
*
9
];
...
@@ -1861,7 +1282,11 @@ __device__ void loadPmeDirectElectrostaticShared( struct PmeDirectElectrostaticP
...
@@ -1861,7 +1282,11 @@ __device__ void loadPmeDirectElectrostaticShared( struct PmeDirectElectrostaticP
sA
->
labFrameQuadrupole
[
2
]
=
cAmoebaSim
.
pLabFrameQuadrupole
[
atomI
*
9
+
2
];
sA
->
labFrameQuadrupole
[
2
]
=
cAmoebaSim
.
pLabFrameQuadrupole
[
atomI
*
9
+
2
];
sA
->
labFrameQuadrupole
[
3
]
=
cAmoebaSim
.
pLabFrameQuadrupole
[
atomI
*
9
+
4
];
sA
->
labFrameQuadrupole
[
3
]
=
cAmoebaSim
.
pLabFrameQuadrupole
[
atomI
*
9
+
4
];
sA
->
labFrameQuadrupole
[
4
]
=
cAmoebaSim
.
pLabFrameQuadrupole
[
atomI
*
9
+
5
];
sA
->
labFrameQuadrupole
[
4
]
=
cAmoebaSim
.
pLabFrameQuadrupole
[
atomI
*
9
+
5
];
sA
->
labFrameQuadrupole
[
5
]
=
cAmoebaSim
.
pLabFrameQuadrupole
[
atomI
*
9
+
8
];
// traceless tensor
//sA->labFrameQuadrupole[5] = cAmoebaSim.pLabFrameQuadrupole[atomI*9+8];
//sA->labFrameQuadrupole[5] = -(sA->labFrameQuadrupole[0] + sA->labFrameQuadrupole[3]);
// induced dipole
// induced dipole
...
@@ -1881,6 +1306,19 @@ __device__ void loadPmeDirectElectrostaticShared( struct PmeDirectElectrostaticP
...
@@ -1881,6 +1306,19 @@ __device__ void loadPmeDirectElectrostaticShared( struct PmeDirectElectrostaticP
}
}
__device__
void
zeroPmeDirectElectrostaticParticle
(
struct
PmeDirectElectrostaticParticle
*
sA
)
{
sA
->
force
[
0
]
=
0.0
f
;
sA
->
force
[
1
]
=
0.0
f
;
sA
->
force
[
2
]
=
0.0
f
;
sA
->
torque
[
0
]
=
0.0
f
;
sA
->
torque
[
1
]
=
0.0
f
;
sA
->
torque
[
2
]
=
0.0
f
;
}
// Include versions of the kernels for N^2 calculations.
// Include versions of the kernels for N^2 calculations.
#undef USE_OUTPUT_BUFFER_PER_WARP
#undef USE_OUTPUT_BUFFER_PER_WARP
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
View file @
4a1e9683
...
@@ -34,17 +34,7 @@ __launch_bounds__(128, 1)
...
@@ -34,17 +34,7 @@ __launch_bounds__(128, 1)
#else
#else
__launch_bounds__
(
64
,
1
)
__launch_bounds__
(
64
,
1
)
#endif
#endif
void
METHOD_NAME
(
kCalculateAmoebaPmeDirectElectrostatic
,
Forces_kernel
)(
void
METHOD_NAME
(
kCalculateAmoebaPmeDirectElectrostatic
,
Forces_kernel
)(
unsigned
int
*
workUnit
,
float
*
outputTorque
){
unsigned
int
*
workUnit
,
float
*
outputTorque
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
#endif
){
#ifdef AMOEBA_DEBUG
int
maxPullIndex
=
7
;
float4
pullBack
[
12
];
#endif
extern
__shared__
PmeDirectElectrostaticParticle
sA
[];
extern
__shared__
PmeDirectElectrostaticParticle
sA
[];
...
@@ -55,7 +45,6 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
...
@@ -55,7 +45,6 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
lasty
=
0xFFFFFFFF
;
unsigned
int
lasty
=
0xFFFFFFFF
;
float
totalEnergy
=
0
.
0
f
;
float
totalEnergy
=
0
.
0
f
;
float4
forceTorqueEnergy
[
3
];
float
scalingFactors
[
LastScalingIndex
];
float
scalingFactors
[
LastScalingIndex
];
float
conversionFactor
=
(
-
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
);
float
conversionFactor
=
(
-
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
);
...
@@ -74,174 +63,89 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
...
@@ -74,174 +63,89 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
decodeCell
(
workUnit
[
pos
],
&
x
,
&
y
,
&
bExclusionFlag
);
decodeCell
(
workUnit
[
pos
],
&
x
,
&
y
,
&
bExclusionFlag
);
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
PmeDirectElectrostaticParticle
*
psA
=
&
sA
[
tbx
];
PmeDirectElectrostaticParticle
*
psA
=
&
sA
[
tbx
];
unsigned
int
atomI
=
x
+
tgx
;
unsigned
int
atomI
=
x
+
tgx
;
PmeDirectElectrostaticParticle
localParticle
;
PmeDirectElectrostaticParticle
localParticle
I
;
loadPmeDirectElectrostatic
Shared
(
&
localParticle
,
atom
I
);
loadPmeDirectElectrostatic
Particle
(
atomI
,
&
localParticleI
);
localParticle
.
force
[
0
]
=
0
.
0
f
;
zeroPmeDirectElectrostaticParticle
(
&
localParticleI
);
localParticle
.
force
[
1
]
=
0
.
0
f
;
scalingFactors
[
UScaleIndex
]
=
1
.
0
f
;
localParticle
.
force
[
2
]
=
0
.
0
f
;
localParticle
.
torque
[
0
]
=
0
.
0
f
;
localParticle
.
torque
[
1
]
=
0
.
0
f
;
localParticle
.
torque
[
2
]
=
0
.
0
f
;
scalingFactors
[
UScaleIndex
]
=
1
.
0
f
;
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
{
// load shared data
// load shared data
loadPmeDirectElectrostaticShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
);
loadPmeDirectElectrostaticParticle
(
atomI
,
&
(
sA
[
threadIdx
.
x
])
);
if
(
bExclusionFlag
)
{
unsigned
int
xi
=
x
>>
GRIDBITS
;
unsigned
int
cell
=
xi
+
xi
*
cSim
.
paddedNumberOfAtoms
/
GRID
-
xi
*
(
xi
+
1
)
/
2
;
dScaleMask
=
cAmoebaSim
.
pD_ScaleIndices
[
cAmoebaSim
.
pScaleIndicesIndex
[
cell
]
+
tgx
];
pScaleMask
=
cAmoebaSim
.
pP_ScaleIndices
[
cAmoebaSim
.
pScaleIndicesIndex
[
cell
]
+
tgx
];
mScaleMask
=
cAmoebaSim
.
pM_ScaleIndices
[
cAmoebaSim
.
pScaleIndicesIndex
[
cell
]
+
tgx
];
}
else
{
scalingFactors
[
DScaleIndex
]
=
scalingFactors
[
PScaleIndex
]
=
scalingFactors
[
MScaleIndex
]
=
1
.
0
f
;
}
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
unsigned
int
atomJ
=
y
+
j
;
// set scale factors
if
(
atomI
<
cSim
.
atoms
){
if
(
bExclusionFlag
)
if
(
bExclusionFlag
)
{
{
getMaskedDScaleFactor
(
j
,
dScaleMask
,
scalingFactors
+
DScaleIndex
);
unsigned
int
xi
=
x
>>
GRIDBITS
;
getMaskedPScaleFactor
(
j
,
pScaleMask
,
scalingFactors
+
PScaleIndex
);
unsigned
int
cell
=
xi
+
xi
*
cSim
.
paddedNumberOfAtoms
/
GRID
-
xi
*
(
xi
+
1
)
/
2
;
getMaskedMScaleFactor
(
j
,
mScaleMask
,
scalingFactors
+
MScaleIndex
);
dScaleMask
=
cAmoebaSim
.
pD_ScaleIndices
[
cAmoebaSim
.
pScaleIndicesIndex
[
cell
]
+
tgx
];
pScaleMask
=
cAmoebaSim
.
pP_ScaleIndices
[
cAmoebaSim
.
pScaleIndicesIndex
[
cell
]
+
tgx
];
mScaleMask
=
cAmoebaSim
.
pM_ScaleIndices
[
cAmoebaSim
.
pScaleIndicesIndex
[
cell
]
+
tgx
];
}
else
{
scalingFactors
[
DScaleIndex
]
=
scalingFactors
[
PScaleIndex
]
=
scalingFactors
[
MScaleIndex
]
=
1
.
0
f
;
}
}
// force
for
(
unsigned
int
j
=
0
;
j
<
GRID
&&
(
y
+
j
)
<
cSim
.
atoms
;
j
++
)
calculatePmeDirectElectrostaticPairIxn_kernel
(
localParticle
,
psA
[
j
],
scalingFactors
,
forceTorqueEnergy
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
// nan*0.0 = nan not 0.0, so explicitly exclude (atomI == atomJ) contribution
// by setting match flag
if
(
(
atomI
!=
atomJ
)
&&
(
atomI
<
cSim
.
atoms
)
&&
(
atomJ
<
cSim
.
atoms
)
)
{
{
localParticle
.
force
[
0
]
+=
forceTorqueEnergy
[
0
].
x
;
localParticle
.
force
[
1
]
+=
forceTorqueEnergy
[
0
].
y
;
localParticle
.
force
[
2
]
+=
forceTorqueEnergy
[
0
].
z
;
localParticle
.
torque
[
0
]
+=
forceTorqueEnergy
[
1
].
x
;
if
(
atomI
!=
(
y
+
j
)
)
localParticle
.
torque
[
1
]
+=
forceTorqueEnergy
[
1
].
y
;
{
localParticle
.
torque
[
2
]
+=
forceTorqueEnergy
[
1
].
z
;
if
(
bExclusionFlag
)
{
getMaskedDScaleFactor
(
j
,
dScaleMask
,
scalingFactors
+
DScaleIndex
);
getMaskedPScaleFactor
(
j
,
pScaleMask
,
scalingFactors
+
PScaleIndex
);
getMaskedMScaleFactor
(
j
,
mScaleMask
,
scalingFactors
+
MScaleIndex
);
}
calculatePmeDirectElectrostaticPairIxn_kernel
(
localParticleI
,
psA
[
j
],
bExclusionFlag
,
scalingFactors
,
0
.
5
f
,
&
totalEnergy
);
}
}
// end of j-loop
// include self energy and self torque
calculatePmeSelfTorqueElectrostaticPairIxn_kernel
(
localParticleI
);
calculatePmeSelfEnergyElectrostaticPairIxn_kernel
(
localParticleI
,
&
totalEnergy
);
localParticleI
.
force
[
0
]
*=
conversionFactor
;
localParticleI
.
force
[
1
]
*=
conversionFactor
;
localParticleI
.
force
[
2
]
*=
conversionFactor
;
localParticleI
.
torque
[
0
]
*=
-
conversionFactor
;
localParticleI
.
torque
[
1
]
*=
-
conversionFactor
;
localParticleI
.
torque
[
2
]
*=
-
conversionFactor
;
// Write results
// energy for each diagonal-block ixn included twice, hence factor of 0.5
totalEnergy
+=
0
.
5
*
forceTorqueEnergy
[
0
].
w
;
}
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
atomJ
==
targetAtom
){
unsigned
int
mask
=
(
(
atomI
==
atomJ
)
||
(
atomI
>=
cSim
.
atoms
)
||
(
atomJ
>=
cSim
.
atoms
)
)
?
0
:
1
;
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
atomJ
:
atomI
;
float
blockId
=
1
.
0
f
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
atomJ
;
debugArray
[
index
].
z
=
(
float
)
y
;
debugArray
[
index
].
w
=
blockId
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
forceTorqueEnergy
[
0
].
x
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
forceTorqueEnergy
[
0
].
y
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
forceTorqueEnergy
[
0
].
z
:
0
.
0
f
;
debugArray
[
index
].
w
=
mask
?
forceTorqueEnergy
[
0
].
w
:
0
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
forceTorqueEnergy
[
1
].
x
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
forceTorqueEnergy
[
1
].
y
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
forceTorqueEnergy
[
1
].
z
:
0
.
0
f
;
float
offsetF
=
(
float
)(
3
*
(
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
));
debugArray
[
index
].
w
=
offsetF
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
forceTorqueEnergy
[
2
].
x
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
forceTorqueEnergy
[
2
].
y
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
forceTorqueEnergy
[
2
].
z
:
0
.
0
f
;
debugArray
[
index
].
w
=
offsetF
;
for
(
int
pullIndex
=
0
;
pullIndex
<
maxPullIndex
;
pullIndex
++
){
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
}
}
#endif
}
// end of j-loop
// include self energy and self torque
if
(
atomI
<
cSim
.
atoms
){
calculatePmeSelfTorqueElectrostaticPairIxn_kernel
(
localParticle
);
float
energy
;
calculatePmeSelfEnergyElectrostaticPairIxn_kernel
(
localParticle
,
&
energy
);
totalEnergy
+=
energy
;
}
localParticle
.
force
[
0
]
*=
conversionFactor
;
localParticle
.
force
[
1
]
*=
conversionFactor
;
localParticle
.
force
[
2
]
*=
conversionFactor
;
localParticle
.
torque
[
0
]
*=
-
conversionFactor
;
localParticle
.
torque
[
1
]
*=
-
conversionFactor
;
localParticle
.
torque
[
2
]
*=
-
conversionFactor
;
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
(
x
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
);
unsigned
int
offset
=
(
x
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
);
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
add3dArrayToFloat4
(
offset
,
localParticle
I
.
force
,
cSim
.
pForce4
);
add3dArray
(
3
*
offset
,
localParticle
.
torque
,
outputTorque
);
add3dArray
(
3
*
offset
,
localParticle
I
.
torque
,
outputTorque
);
#else
#else
unsigned
int
offset
=
(
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
);
unsigned
int
offset
=
(
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
);
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
add3dArrayToFloat4
(
offset
,
localParticle
I
.
force
,
cSim
.
pForce4
);
load3dArray
(
3
*
offset
,
localParticle
.
torque
,
outputTorque
);
load3dArray
(
3
*
offset
,
localParticle
I
.
torque
,
outputTorque
);
#endif
#endif
}
}
else
{
}
else
{
if
(
lasty
!=
y
)
{
if
(
lasty
!=
y
)
{
loadPmeDirectElectrostaticParticle
(
(
y
+
tgx
),
&
(
sA
[
threadIdx
.
x
])
);
// load shared data
loadPmeDirectElectrostaticShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
)
);
}
}
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
cSim
.
pInteractionFlag
[
pos
]
!=
0
)
{
if
(
flags
==
0
)
{
// No interactions in this block.
}
else
{
#ifdef CALCULATE_FULL_TILE
zeroPmeDirectElectrostaticParticle
(
&
(
sA
[
threadIdx
.
x
])
);
flags
=
0xFFFFFFFF
;
/*
#endif
sA[threadIdx.x].force[0] = 0.0f;
sA[threadIdx.x].force[0] = 0.0f;
sA[threadIdx.x].force[1] = 0.0f;
sA[threadIdx.x].force[1] = 0.0f;
sA[threadIdx.x].force[2] = 0.0f;
sA[threadIdx.x].force[2] = 0.0f;
...
@@ -249,7 +153,7 @@ if( atomI == targetAtom || atomJ == targetAtom ){
...
@@ -249,7 +153,7 @@ if( atomI == targetAtom || atomJ == targetAtom ){
sA[threadIdx.x].torque[0] = 0.0f;
sA[threadIdx.x].torque[0] = 0.0f;
sA[threadIdx.x].torque[1] = 0.0f;
sA[threadIdx.x].torque[1] = 0.0f;
sA[threadIdx.x].torque[2] = 0.0f;
sA[threadIdx.x].torque[2] = 0.0f;
*/
if
(
bExclusionFlag
)
if
(
bExclusionFlag
)
{
{
unsigned
int
xi
=
x
>>
GRIDBITS
;
unsigned
int
xi
=
x
>>
GRIDBITS
;
...
@@ -264,165 +168,49 @@ if( atomI == targetAtom || atomJ == targetAtom ){
...
@@ -264,165 +168,49 @@ if( atomI == targetAtom || atomJ == targetAtom ){
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
{
if
(
(
flags
&
(
1
<<
j
)
)
!=
0
)
{
unsigned
int
jIdx
=
(
flags
==
0xFFFFFFFF
)
?
tj
:
j
;
unsigned
int
atomJ
=
y
+
jIdx
;
// set scale factors
if
(
bExclusionFlag
)
// set scale factors and calculate force
{
getMaskedDScaleFactor
(
jIdx
,
dScaleMask
,
scalingFactors
+
DScaleIndex
);
getMaskedPScaleFactor
(
jIdx
,
pScaleMask
,
scalingFactors
+
PScaleIndex
);
getMaskedMScaleFactor
(
jIdx
,
mScaleMask
,
scalingFactors
+
MScaleIndex
);
}
// force
if
(
bExclusionFlag
){
getMaskedDScaleFactor
(
tj
,
dScaleMask
,
scalingFactors
+
DScaleIndex
);
getMaskedPScaleFactor
(
tj
,
pScaleMask
,
scalingFactors
+
PScaleIndex
);
getMaskedMScaleFactor
(
tj
,
mScaleMask
,
scalingFactors
+
MScaleIndex
);
}
calculatePmeDirectElectrostaticPairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
// check if atoms out-of-bounds
scalingFactors
,
forceTorqueEnergy
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
// check if atoms out-of-bounds
if
(
(
atomI
<
cSim
.
atoms
)
&&
((
y
+
tj
)
<
cSim
.
atoms
)
)
{
if
(
(
atomI
<
cSim
.
atoms
)
&&
(
atomJ
<
cSim
.
atoms
)
)
calculatePmeDirectElectrostaticPairIxn_kernel
(
localParticleI
,
psA
[
tj
],
bExclusionFlag
,
scalingFactors
,
1
.
0
f
,
&
totalEnergy
);
{
}
// add force and torque to atom I due atom J
localParticle
.
force
[
0
]
+=
forceTorqueEnergy
[
0
].
x
;
localParticle
.
force
[
1
]
+=
forceTorqueEnergy
[
0
].
y
;
localParticle
.
force
[
2
]
+=
forceTorqueEnergy
[
0
].
z
;
totalEnergy
+=
forceTorqueEnergy
[
0
].
w
;
localParticle
.
torque
[
0
]
+=
forceTorqueEnergy
[
1
].
x
;
localParticle
.
torque
[
1
]
+=
forceTorqueEnergy
[
1
].
y
;
localParticle
.
torque
[
2
]
+=
forceTorqueEnergy
[
1
].
z
;
// add force and torque to atom J due atom I
if
(
flags
==
0xFFFFFFFF
){
psA
[
jIdx
].
force
[
0
]
-=
forceTorqueEnergy
[
0
].
x
;
psA
[
jIdx
].
force
[
1
]
-=
forceTorqueEnergy
[
0
].
y
;
psA
[
jIdx
].
force
[
2
]
-=
forceTorqueEnergy
[
0
].
z
;
psA
[
jIdx
].
torque
[
0
]
+=
forceTorqueEnergy
[
2
].
x
;
psA
[
jIdx
].
torque
[
1
]
+=
forceTorqueEnergy
[
2
].
y
;
psA
[
jIdx
].
torque
[
2
]
+=
forceTorqueEnergy
[
2
].
z
;
#ifndef CALCULATE_FULL_TILE
}
else
{
sA
[
threadIdx
.
x
].
tempForce
[
0
]
=
forceTorqueEnergy
[
0
].
x
;
sA
[
threadIdx
.
x
].
tempForce
[
1
]
=
forceTorqueEnergy
[
0
].
y
;
sA
[
threadIdx
.
x
].
tempForce
[
2
]
=
forceTorqueEnergy
[
0
].
z
;
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
=
forceTorqueEnergy
[
2
].
x
;
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
=
forceTorqueEnergy
[
2
].
y
;
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
=
forceTorqueEnergy
[
2
].
z
;
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
}
if
(
tgx
%
4
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
2
]
);
}
if
(
tgx
%
8
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
4
]
);
}
if
(
tgx
%
16
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
8
]
);
}
if
(
tgx
==
0
)
{
psA
[
jIdx
].
force
[
0
]
-=
sA
[
threadIdx
.
x
].
tempForce
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempForce
[
0
];
psA
[
jIdx
].
force
[
1
]
-=
sA
[
threadIdx
.
x
].
tempForce
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempForce
[
1
];
psA
[
jIdx
].
force
[
2
]
-=
sA
[
threadIdx
.
x
].
tempForce
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempForce
[
2
];
psA
[
jIdx
].
torque
[
0
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
0
];
psA
[
jIdx
].
torque
[
1
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
1
];
psA
[
jIdx
].
torque
[
2
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
2
];
}
#endif
}
}
// end of atoms out-of-bounds
}
// end of flags&(1<<j block
#ifdef AMOEBA_DEBUG
unsigned
int
jIdx
=
(
flags
==
0xFFFFFFFF
)
?
tj
:
j
;
unsigned
int
atomJ
=
y
+
jIdx
;
unsigned
int
mask
=
(
(
atomI
>=
cSim
.
atoms
)
||
(
atomJ
>=
cSim
.
atoms
)
)
?
0
:
1
;
if
(
atomI
==
targetAtom
||
atomJ
==
targetAtom
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
atomJ
:
atomI
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
atomJ
;
debugArray
[
index
].
z
=
(
float
)
y
;
debugArray
[
index
].
w
=
(
flags
==
0xFFFFFFFF
)
?
(
float
)
-
141
.
0
f
:
-
151
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
forceTorqueEnergy
[
0
].
x
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
forceTorqueEnergy
[
0
].
y
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
forceTorqueEnergy
[
0
].
z
:
0
.
0
f
;
debugArray
[
index
].
w
=
mask
?
forceTorqueEnergy
[
0
].
w
:
0
.
0
f
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
forceTorqueEnergy
[
1
].
x
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
forceTorqueEnergy
[
1
].
y
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
forceTorqueEnergy
[
1
].
z
:
0
.
0
f
;
float
offsetF
=
(
float
)(
3
*
(
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
));
debugArray
[
index
].
w
=
offsetF
;
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
forceTorqueEnergy
[
2
].
x
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
forceTorqueEnergy
[
2
].
y
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
forceTorqueEnergy
[
2
].
z
:
0
.
0
f
;
offsetF
=
(
float
)
(
3
*
(
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
));
debugArray
[
index
].
w
=
offsetF
;
for
(
int
pullIndex
=
0
;
pullIndex
<
maxPullIndex
;
pullIndex
++
){
index
+=
cSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullIndex
].
w
;
}
}
#endif
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
// end of j-loop
}
// end of j-loop
localParticle
.
force
[
0
]
*=
conversionFactor
;
localParticle
I
.
force
[
0
]
*=
conversionFactor
;
localParticle
.
force
[
1
]
*=
conversionFactor
;
localParticle
I
.
force
[
1
]
*=
conversionFactor
;
localParticle
.
force
[
2
]
*=
conversionFactor
;
localParticle
I
.
force
[
2
]
*=
conversionFactor
;
localParticle
.
torque
[
0
]
*=
-
conversionFactor
;
localParticle
I
.
torque
[
0
]
*=
-
conversionFactor
;
localParticle
.
torque
[
1
]
*=
-
conversionFactor
;
localParticle
I
.
torque
[
1
]
*=
-
conversionFactor
;
localParticle
.
torque
[
2
]
*=
-
conversionFactor
;
localParticle
I
.
torque
[
2
]
*=
-
conversionFactor
;
sA
[
threadIdx
.
x
].
force
[
0
]
*=
conversionFactor
;
sA
[
threadIdx
.
x
].
force
[
0
]
*=
conversionFactor
;
sA
[
threadIdx
.
x
].
force
[
1
]
*=
conversionFactor
;
sA
[
threadIdx
.
x
].
force
[
1
]
*=
conversionFactor
;
sA
[
threadIdx
.
x
].
force
[
2
]
*=
conversionFactor
;
sA
[
threadIdx
.
x
].
force
[
2
]
*=
conversionFactor
;
sA
[
threadIdx
.
x
].
torque
[
0
]
*=
-
conversionFactor
;
sA
[
threadIdx
.
x
].
torque
[
0
]
*=
-
conversionFactor
;
sA
[
threadIdx
.
x
].
torque
[
1
]
*=
-
conversionFactor
;
sA
[
threadIdx
.
x
].
torque
[
1
]
*=
-
conversionFactor
;
sA
[
threadIdx
.
x
].
torque
[
2
]
*=
-
conversionFactor
;
sA
[
threadIdx
.
x
].
torque
[
2
]
*=
-
conversionFactor
;
// Write results
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
(
x
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
);
unsigned
int
offset
=
(
x
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
);
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
add3dArrayToFloat4
(
offset
,
localParticle
I
.
force
,
cSim
.
pForce4
);
add3dArray
(
3
*
offset
,
localParticle
.
torque
,
outputTorque
);
add3dArray
(
3
*
offset
,
localParticle
I
.
torque
,
outputTorque
);
offset
=
(
y
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
);
offset
=
(
y
+
tgx
+
warp
*
cSim
.
paddedNumberOfAtoms
);
add3dArrayToFloat4
(
offset
,
sA
[
threadIdx
.
x
].
force
,
cSim
.
pForce4
);
add3dArrayToFloat4
(
offset
,
sA
[
threadIdx
.
x
].
force
,
cSim
.
pForce4
);
...
@@ -430,18 +218,18 @@ if( atomI == targetAtom || atomJ == targetAtom ){
...
@@ -430,18 +218,18 @@ if( atomI == targetAtom || atomJ == targetAtom ){
#else
#else
unsigned
int
offset
=
(
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
);
unsigned
int
offset
=
(
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
);
add3dArrayToFloat4
(
offset
,
localParticle
.
force
,
cSim
.
pForce4
);
add3dArrayToFloat4
(
offset
,
localParticle
I
.
force
,
cSim
.
pForce4
);
load3dArray
(
3
*
offset
,
localParticle
.
torque
,
outputTorque
);
load3dArray
(
3
*
offset
,
localParticle
I
.
torque
,
outputTorque
);
offset
=
(
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
);
offset
=
(
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
paddedNumberOfAtoms
);
add3dArrayToFloat4
(
offset
,
sA
[
threadIdx
.
x
].
force
,
cSim
.
pForce4
);
add3dArrayToFloat4
(
offset
,
sA
[
threadIdx
.
x
].
force
,
cSim
.
pForce4
);
load3dArray
(
3
*
offset
,
sA
[
threadIdx
.
x
].
torque
,
outputTorque
);
load3dArray
(
3
*
offset
,
sA
[
threadIdx
.
x
].
torque
,
outputTorque
);
#endif
#endif
lasty
=
y
;
}
// end of pInteractionFlag block
}
// end of pInteractionFlag block
lasty
=
y
;
}
}
pos
++
;
pos
++
;
}
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticF1.h
0 → 100644
View file @
4a1e9683
static
__device__
void
SUB_METHOD_NAME
(
calculatePmeDirectElectrostaticPairIxnF1
,
_kernel
)(
PmeDirectElectrostaticParticle
&
atomI
,
PmeDirectElectrostaticParticle
&
atomJ
,
float4
delta
,
float4
bn
,
float
bn5
,
float
forceFactor
,
#ifdef APPLY_SCALE
const
float
*
scalingFactors
,
#endif
float
force
[
3
],
float
*
energy
){
float
xr
=
delta
.
x
;
float
yr
=
delta
.
y
;
float
zr
=
delta
.
z
;
#ifdef APPLY_SCALE
float
rr1
=
delta
.
w
;
#endif
// set the permanent multipole and induced dipole values;
float
ci
=
atomI
.
q
;
float
di1
=
atomI
.
labFrameDipole
[
0
];
float
di2
=
atomI
.
labFrameDipole
[
1
];
float
di3
=
atomI
.
labFrameDipole
[
2
];
float
qi1
=
atomI
.
labFrameQuadrupole
[
0
];
float
qi2
=
atomI
.
labFrameQuadrupole
[
1
];
float
qi3
=
atomI
.
labFrameQuadrupole
[
2
];
float
qi5
=
atomI
.
labFrameQuadrupole
[
3
];
float
qi6
=
atomI
.
labFrameQuadrupole
[
4
];
//float qi9 = atomI.labFrameQuadrupole[5];
float
qi9
=
-
(
atomI
.
labFrameQuadrupole
[
0
]
+
atomI
.
labFrameQuadrupole
[
3
]);
float
ck
=
atomJ
.
q
;
float
dk1
=
atomJ
.
labFrameDipole
[
0
];
float
dk2
=
atomJ
.
labFrameDipole
[
1
];
float
dk3
=
atomJ
.
labFrameDipole
[
2
];
float
qk1
=
atomJ
.
labFrameQuadrupole
[
0
];
float
qk2
=
atomJ
.
labFrameQuadrupole
[
1
];
float
qk3
=
atomJ
.
labFrameQuadrupole
[
2
];
float
qk5
=
atomJ
.
labFrameQuadrupole
[
3
];
float
qk6
=
atomJ
.
labFrameQuadrupole
[
4
];
// float qk9 = atomJ.labFrameQuadrupole[5];
float
qk9
=
-
(
atomJ
.
labFrameQuadrupole
[
0
]
+
atomJ
.
labFrameQuadrupole
[
3
]);
float
bn1
=
bn
.
x
;
float
bn2
=
bn
.
y
;
float
bn3
=
bn
.
z
;
float
bn4
=
bn
.
w
;
#ifdef APPLY_SCALE
float
offset
=
1
.
0
f
-
scalingFactors
[
MScaleIndex
];
float
rr3
=
rr1
*
rr1
*
rr1
;
float
gf4
=
2
.
0
f
*
(
bn2
-
3
.
0
f
*
offset
*
rr3
*
rr1
*
rr1
);
#else
float
gf4
=
2
.
0
f
*
bn2
;
#endif
float
qidk1
=
qi1
*
dk1
+
qi2
*
dk2
+
qi3
*
dk3
;
float
qkdi1
=
qk1
*
di1
+
qk2
*
di2
+
qk3
*
di3
;
float
ftm21
=
gf4
*
(
qkdi1
-
qidk1
);
float
qidk2
=
qi2
*
dk1
+
qi5
*
dk2
+
qi6
*
dk3
;
float
qkdi2
=
qk2
*
di1
+
qk5
*
di2
+
qk6
*
di3
;
float
ftm22
=
gf4
*
(
qkdi2
-
qidk2
);
float
qidk3
=
qi3
*
dk1
+
qi6
*
dk2
+
qi9
*
dk3
;
float
qkdi3
=
qk3
*
di1
+
qk6
*
di2
+
qk9
*
di3
;
float
ftm23
=
gf4
*
(
qkdi3
-
qidk3
);
float
qir1
=
qi1
*
xr
+
qi2
*
yr
+
qi3
*
zr
;
float
qir2
=
qi2
*
xr
+
qi5
*
yr
+
qi6
*
zr
;
float
qir3
=
qi3
*
xr
+
qi6
*
yr
+
qi9
*
zr
;
float
qkr1
=
qk1
*
xr
+
qk2
*
yr
+
qk3
*
zr
;
float
qkr2
=
qk2
*
xr
+
qk5
*
yr
+
qk6
*
zr
;
float
qkr3
=
qk3
*
xr
+
qk6
*
yr
+
qk9
*
zr
;
#ifdef APPLY_SCALE
float
gf7
=
4
.
0
f
*
(
bn3
-
15
.
0
f
*
offset
*
rr3
*
rr3
*
rr1
);
#else
float
gf7
=
4
.
0
f
*
bn3
;
#endif
float
qiqkr1
=
qi1
*
qkr1
+
qi2
*
qkr2
+
qi3
*
qkr3
;
float
qkqir1
=
qk1
*
qir1
+
qk2
*
qir2
+
qk3
*
qir3
;
ftm21
+=
gf7
*
(
qiqkr1
+
qkqir1
);
float
qiqkr2
=
qi2
*
qkr1
+
qi5
*
qkr2
+
qi6
*
qkr3
;
float
qkqir2
=
qk2
*
qir1
+
qk5
*
qir2
+
qk6
*
qir3
;
ftm22
+=
gf7
*
(
qiqkr2
+
qkqir2
);
float
qiqkr3
=
qi3
*
qkr1
+
qi6
*
qkr2
+
qi9
*
qkr3
;
float
qkqir3
=
qk3
*
qir1
+
qk6
*
qir2
+
qk9
*
qir3
;
ftm23
+=
gf7
*
(
qiqkr3
+
qkqir3
);
// calculate the scalar products for permanent components
float
gl6
=
di1
*
dk1
+
di2
*
dk2
+
di3
*
dk3
;
float
gl7
=
2
.
0
f
*
(
qir1
*
dk1
+
qir2
*
dk2
+
qir3
*
dk3
-
(
qkr1
*
di1
+
qkr2
*
di2
+
qkr3
*
di3
)
);
float
gl5
=
-
4
.
0
f
*
(
qir1
*
qkr1
+
qir2
*
qkr2
+
qir3
*
qkr3
);
float
gl8
=
2
.
0
f
*
(
qi1
*
qk1
+
qi2
*
qk2
+
qi3
*
qk3
+
qi2
*
qk2
+
qi5
*
qk5
+
qi6
*
qk6
+
qi3
*
qk3
+
qi6
*
qk6
+
qi9
*
qk9
);
float
sc3
=
di1
*
xr
+
di2
*
yr
+
di3
*
zr
;
float
sc5
=
qir1
*
xr
+
qir2
*
yr
+
qir3
*
zr
;
float
sc4
=
dk1
*
xr
+
dk2
*
yr
+
dk3
*
zr
;
float
sc6
=
qkr1
*
xr
+
qkr2
*
yr
+
qkr3
*
zr
;
float
gl0
=
ci
*
ck
;
float
gl1
=
ck
*
sc3
-
ci
*
sc4
;
float
gl2
=
ci
*
sc6
+
ck
*
sc5
-
sc3
*
sc4
;
float
gl3
=
sc3
*
sc6
-
sc4
*
sc5
;
float
gl4
=
sc5
*
sc6
;
#ifdef APPLY_SCALE
//forceTorqueEnergy->w += forceFactor*(-offset*rr1*gl0 + (bn1-offset*rr3)*(gl1+gl6) + (bn2-offset*(3.0f*rr3*rr1*rr1))*(gl2+gl7+gl8) + (bn3-offset*(15.0f*rr3*rr3*rr1))*(gl3+gl5) + (bn4-offset*(105.0f*rr3*rr3*rr3))*gl4);
*
energy
+=
forceFactor
*
(
-
offset
*
rr1
*
gl0
+
(
bn1
-
offset
*
rr3
)
*
(
gl1
+
gl6
)
+
(
bn2
-
offset
*
(
3
.
0
f
*
rr3
*
rr1
*
rr1
))
*
(
gl2
+
gl7
+
gl8
)
+
(
bn3
-
offset
*
(
15
.
0
f
*
rr3
*
rr3
*
rr1
))
*
(
gl3
+
gl5
)
+
(
bn4
-
offset
*
(
105
.
0
f
*
rr3
*
rr3
*
rr3
))
*
gl4
);
#else
//forceTorqueEnergy->w += bn1*(gl1+gl6) + bn2*(gl2+gl7+gl8) + bn3*(gl3+gl5) + bn4*gl4;
*
energy
+=
forceFactor
*
(
bn1
*
(
gl1
+
gl6
)
+
bn2
*
(
gl2
+
gl7
+
gl8
)
+
bn3
*
(
gl3
+
gl5
)
+
bn4
*
gl4
);
#endif
float
gf1
=
bn1
*
gl0
+
bn2
*
(
gl1
+
gl6
)
+
bn3
*
(
gl2
+
gl7
+
gl8
)
+
bn4
*
(
gl3
+
gl5
)
+
bn5
*
gl4
;
#ifdef APPLY_SCALE
gf1
-=
offset
*
(
rr3
*
gl0
+
(
3
.
0
f
*
rr3
*
rr1
*
rr1
)
*
(
gl1
+
gl6
)
+
(
15
.
0
f
*
rr3
*
rr3
*
rr1
)
*
(
gl2
+
gl7
+
gl8
)
+
(
105
.
0
f
*
rr3
*
rr3
*
rr3
)
*
(
gl3
+
gl5
)
+
(
945
.
0
f
*
rr3
*
rr3
*
rr3
*
rr1
*
rr1
)
*
gl4
);
#endif
ftm21
+=
gf1
*
xr
;
ftm22
+=
gf1
*
yr
;
ftm23
+=
gf1
*
zr
;
#ifdef APPLY_SCALE
float
gf2
=
-
ck
*
bn1
+
sc4
*
bn2
-
sc6
*
bn3
-
offset
*
(
-
ck
*
rr3
+
sc4
*
(
3
.
0
f
*
rr3
*
rr1
*
rr1
)
-
sc6
*
(
15
.
0
f
*
rr3
*
rr3
*
rr1
));
#else
float
gf2
=
-
ck
*
bn1
+
sc4
*
bn2
-
sc6
*
bn3
;
#endif
ftm21
+=
gf2
*
di1
;
ftm22
+=
gf2
*
di2
;
ftm23
+=
gf2
*
di3
;
#ifdef APPLY_SCALE
float
gf5
=
2
.
0
f
*
(
-
ck
*
bn2
+
sc4
*
bn3
-
sc6
*
bn4
-
offset
*
(
-
ck
*
(
3
.
0
f
*
rr3
*
rr1
*
rr1
)
+
sc4
*
(
15
.
0
f
*
rr3
*
rr3
*
rr1
)
-
sc6
*
(
105
.
0
f
*
rr3
*
rr3
*
rr3
)));
#else
float
gf5
=
2
.
0
f
*
(
-
ck
*
bn2
+
sc4
*
bn3
-
sc6
*
bn4
);
#endif
ftm21
+=
gf5
*
qir1
;
ftm22
+=
gf5
*
qir2
;
ftm23
+=
gf5
*
qir3
;
#ifdef APPLY_SCALE
float
gf3
=
ci
*
bn1
+
sc3
*
bn2
+
sc5
*
bn3
-
offset
*
(
ci
*
rr3
+
sc3
*
(
3
.
0
f
*
rr3
*
rr1
*
rr1
)
+
sc5
*
(
15
.
0
f
*
rr3
*
rr3
*
rr1
));
#else
float
gf3
=
ci
*
bn1
+
sc3
*
bn2
+
sc5
*
bn3
;
#endif
ftm21
+=
gf3
*
dk1
;
ftm22
+=
gf3
*
dk2
;
ftm23
+=
gf3
*
dk3
;
#ifdef APPLY_SCALE
float
gf6
=
2
.
0
f
*
(
-
ci
*
bn2
-
sc3
*
bn3
-
sc5
*
bn4
-
offset
*
(
-
ci
*
(
3
.
0
f
*
rr3
*
rr1
*
rr1
)
-
sc3
*
(
15
.
0
f
*
rr3
*
rr3
*
rr1
)
-
sc5
*
(
105
.
0
f
*
rr3
*
rr3
*
rr3
)));
#else
float
gf6
=
2
.
0
f
*
(
-
ci
*
bn2
-
sc3
*
bn3
-
sc5
*
bn4
);
#endif
ftm21
+=
gf6
*
qkr1
;
ftm22
+=
gf6
*
qkr2
;
ftm23
+=
gf6
*
qkr3
;
force
[
0
]
=
ftm21
;
force
[
1
]
=
ftm22
;
force
[
2
]
=
ftm23
;
/*
if( forceFactor == 1.0f ){
atomJ.force[0] -= ftm21;
atomJ.force[1] -= ftm22;
atomJ.force[2] -= ftm23;
}
atomI.force[0] += ftm21;
atomI.force[1] += ftm22;
atomI.force[2] += ftm23;
*/
return
;
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticF2P.h
0 → 100644
View file @
4a1e9683
static
__device__
void
SUB_METHOD_NAME
(
calculatePmeDirectElectrostaticPairIxnF2
,
_kernel
)(
PmeDirectElectrostaticParticle
&
atomI
,
PmeDirectElectrostaticParticle
&
atomJ
,
float4
delta
,
float4
bn
,
float
forceFactor
,
#ifdef APPLY_SCALE
const
float
*
scalingFactors
,
#endif
float
force
[
3
],
float
*
energy
){
float
xr
=
delta
.
x
;
float
yr
=
delta
.
y
;
float
zr
=
delta
.
z
;
float
rr1
=
delta
.
w
;
// set the permanent multipole and induced dipole values;
float
ci
=
atomI
.
q
;
float
di1
=
atomI
.
labFrameDipole
[
0
];
float
di2
=
atomI
.
labFrameDipole
[
1
];
float
di3
=
atomI
.
labFrameDipole
[
2
];
float
qi1
=
atomI
.
labFrameQuadrupole
[
0
];
float
qi2
=
atomI
.
labFrameQuadrupole
[
1
];
float
qi3
=
atomI
.
labFrameQuadrupole
[
2
];
float
qi5
=
atomI
.
labFrameQuadrupole
[
3
];
float
qi6
=
atomI
.
labFrameQuadrupole
[
4
];
// float qi9 = atomI.labFrameQuadrupole[5];
float
qi9
=
-
(
atomI
.
labFrameQuadrupole
[
0
]
+
atomI
.
labFrameQuadrupole
[
3
]);
float
bn1
=
bn
.
x
;
float
bn2
=
bn
.
y
;
float
bn3
=
bn
.
z
;
float
bn4
=
bn
.
w
;
float
damp
=
atomI
.
damp
*
atomJ
.
damp
;
if
(
damp
!=
0
.
0
f
){
float
pgamma
=
atomI
.
thole
<
atomJ
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
float
ratio
=
1
.
0
f
/
(
rr1
*
damp
);
damp
=
-
pgamma
*
ratio
*
ratio
*
ratio
;
damp
=
damp
<
-
50
.
0
f
?
0
.
0
f
:
damp
;
}
float
scale5
=
(
damp
==
0
.
0
f
)
?
1
.
0
f
:
(
1
.
0
f
-
(
1
.
0
f
-
damp
)
*
exp
(
damp
));
float
rr5
=
rr1
*
rr1
;
rr5
=
3
.
0
f
*
rr1
*
rr5
*
rr5
;
#ifdef APPLY_SCALE
float
psc5
=
rr5
*
(
1
.
0
f
-
scale5
*
scalingFactors
[
PScaleIndex
]);
float
dsc5
=
rr5
*
(
1
.
0
f
-
scale5
*
scalingFactors
[
DScaleIndex
]);
float
usc5
=
rr5
*
(
1
.
0
f
-
scale5
*
scalingFactors
[
UScaleIndex
]);
#else
float
psc5
=
rr5
*
(
1
.
0
f
-
scale5
);
#endif
float
qiuk1
=
qi1
*
atomJ
.
inducedDipole
[
0
]
+
qi2
*
atomJ
.
inducedDipole
[
1
]
+
qi3
*
atomJ
.
inducedDipole
[
2
];
float
qiukp1
=
qi1
*
atomJ
.
inducedDipoleP
[
0
]
+
qi2
*
atomJ
.
inducedDipoleP
[
1
]
+
qi3
*
atomJ
.
inducedDipoleP
[
2
];
float
ftm21
=
-
bn2
*
(
qiuk1
+
qiukp1
);
#ifdef APPLY_SCALE
ftm21
+=
qiuk1
*
psc5
+
qiukp1
*
dsc5
;
#else
ftm21
+=
(
qiuk1
+
qiukp1
)
*
psc5
;
#endif
float
qiuk2
=
qi2
*
atomJ
.
inducedDipole
[
0
]
+
qi5
*
atomJ
.
inducedDipole
[
1
]
+
qi6
*
atomJ
.
inducedDipole
[
2
];
float
qiukp2
=
qi2
*
atomJ
.
inducedDipoleP
[
0
]
+
qi5
*
atomJ
.
inducedDipoleP
[
1
]
+
qi6
*
atomJ
.
inducedDipoleP
[
2
];
float
ftm22
=
-
bn2
*
(
qiuk2
+
qiukp2
);
#ifdef APPLY_SCALE
ftm22
+=
((
qiuk2
)
*
psc5
+
(
qiukp2
)
*
dsc5
);
#else
ftm22
+=
(
qiuk2
+
qiukp2
)
*
psc5
;
#endif
float
qiuk3
=
qi3
*
atomJ
.
inducedDipole
[
0
]
+
qi6
*
atomJ
.
inducedDipole
[
1
]
+
qi9
*
atomJ
.
inducedDipole
[
2
];
float
qiukp3
=
qi3
*
atomJ
.
inducedDipoleP
[
0
]
+
qi6
*
atomJ
.
inducedDipoleP
[
1
]
+
qi9
*
atomJ
.
inducedDipoleP
[
2
];
float
ftm23
=
-
bn2
*
(
qiuk3
+
qiukp3
);
#ifdef APPLY_SCALE
ftm23
+=
((
qiuk3
)
*
psc5
+
(
qiukp3
)
*
dsc5
);
#else
ftm23
+=
(
qiuk3
+
qiukp3
)
*
psc5
;
#endif
float
expdamp
=
exp
(
damp
);
float
scale3
=
(
damp
==
0
.
0
f
)
?
1
.
0
f
:
(
1
.
0
f
-
expdamp
);
float
rr3
=
rr1
*
rr1
*
rr1
;
#ifdef APPLY_SCALE
float
psc3
=
rr3
*
(
1
.
0
f
-
scale3
*
scalingFactors
[
PScaleIndex
]);
float
dsc3
=
rr3
*
(
1
.
0
f
-
scale3
*
scalingFactors
[
DScaleIndex
]);
float
usc3
=
rr3
*
(
1
.
0
f
-
scale3
*
scalingFactors
[
UScaleIndex
]);
#else
float
psc3
=
rr3
*
(
1
.
0
f
-
scale3
);
#endif
float
scale7
=
(
damp
==
0
.
0
f
)
?
1
.
0
f
:
(
1
.
0
f
-
(
1
.
0
f
-
damp
+
0
.
6
f
*
damp
*
damp
)
*
expdamp
);
#ifdef APPLY_SCALE
float
psc7
=
(
15
.
0
f
*
rr3
*
rr3
*
rr1
)
*
(
1
.
0
f
-
scale7
*
scalingFactors
[
PScaleIndex
]);
float
dsc7
=
(
15
.
0
f
*
rr3
*
rr3
*
rr1
)
*
(
1
.
0
f
-
scale7
*
scalingFactors
[
DScaleIndex
]);
#else
float
psc7
=
(
15
.
0
f
*
rr3
*
rr3
*
rr1
)
*
(
1
.
0
f
-
scale7
);
#endif
float
qir1
=
qi1
*
xr
+
qi2
*
yr
+
qi3
*
zr
;
float
qir2
=
qi2
*
xr
+
qi5
*
yr
+
qi6
*
zr
;
float
qir3
=
qi3
*
xr
+
qi6
*
yr
+
qi9
*
zr
;
float
sc3
=
di1
*
xr
+
di2
*
yr
+
di3
*
zr
;
float
sc5
=
qir1
*
xr
+
qir2
*
yr
+
qir3
*
zr
;
float
gfi3
=
ci
*
bn1
+
sc3
*
bn2
+
sc5
*
bn3
;
float
prefactor1
;
prefactor1
=
0
.
5
f
*
(
ci
*
psc3
+
sc3
*
psc5
+
sc5
*
psc7
-
gfi3
);
ftm21
-=
prefactor1
*
atomJ
.
inducedDipole
[
0
];
ftm22
-=
prefactor1
*
atomJ
.
inducedDipole
[
1
];
ftm23
-=
prefactor1
*
atomJ
.
inducedDipole
[
2
];
#ifdef APPLY_SCALE
prefactor1
=
0
.
5
f
*
(
ci
*
dsc3
+
sc3
*
dsc5
+
sc5
*
dsc7
-
gfi3
);
#endif
ftm21
-=
prefactor1
*
atomJ
.
inducedDipoleP
[
0
];
ftm22
-=
prefactor1
*
atomJ
.
inducedDipoleP
[
1
];
ftm23
-=
prefactor1
*
atomJ
.
inducedDipoleP
[
2
];
float
sci4
=
atomJ
.
inducedDipole
[
0
]
*
xr
+
atomJ
.
inducedDipole
[
1
]
*
yr
+
atomJ
.
inducedDipole
[
2
]
*
zr
;
//forceTorqueEnergy->w += 0.5f*((psc3-bn1)*(ci*sci4) + (psc5-bn2)*(sc3*sci4) + (psc7-bn3)*(sci4*sc5));
*
energy
+=
forceFactor
*
0
.
5
f
*
((
psc3
-
bn1
)
*
(
ci
*
sci4
)
+
(
psc5
-
bn2
)
*
(
sc3
*
sci4
)
+
(
psc7
-
bn3
)
*
(
sci4
*
sc5
));
float
scip4
=
atomJ
.
inducedDipoleP
[
0
]
*
xr
+
atomJ
.
inducedDipoleP
[
1
]
*
yr
+
atomJ
.
inducedDipoleP
[
2
]
*
zr
;
if
(
cAmoebaSim
.
polarizationType
==
0
){
#ifdef APPLY_SCALE
prefactor1
=
0
.
5
f
*
(
bn2
-
usc5
);
#else
prefactor1
=
0
.
5
f
*
(
bn2
-
psc5
);
#endif
ftm21
+=
prefactor1
*
(
(
sci4
*
atomI
.
inducedDipoleP
[
0
]
+
scip4
*
atomI
.
inducedDipole
[
0
])
);
ftm22
+=
prefactor1
*
(
(
sci4
*
atomI
.
inducedDipoleP
[
1
]
+
scip4
*
atomI
.
inducedDipole
[
1
])
);
ftm23
+=
prefactor1
*
(
(
sci4
*
atomI
.
inducedDipoleP
[
2
]
+
scip4
*
atomI
.
inducedDipole
[
2
])
);
}
#ifdef APPLY_SCALE
prefactor1
=
0
.
5
f
*
(
bn2
*
(
sci4
+
scip4
)
-
(
sci4
*
psc5
+
scip4
*
dsc5
)
);
#else
sci4
+=
scip4
;
prefactor1
=
0
.
5
f
*
sci4
*
(
bn2
-
psc5
);
#endif
ftm21
+=
prefactor1
*
di1
;
ftm22
+=
prefactor1
*
di2
;
ftm23
+=
prefactor1
*
di3
;
#ifdef APPLY_SCALE
float
gfi5
=
bn3
*
(
sci4
+
scip4
)
-
(
sci4
*
psc7
+
scip4
*
dsc7
);
#else
float
gfi5
=
sci4
*
(
bn3
-
psc7
);
#endif
ftm21
+=
gfi5
*
qir1
;
ftm22
+=
gfi5
*
qir2
;
ftm23
+=
gfi5
*
qir3
;
float
sci7
=
qir1
*
atomJ
.
inducedDipole
[
0
]
+
qir2
*
atomJ
.
inducedDipole
[
1
]
+
qir3
*
atomJ
.
inducedDipole
[
2
];
//forceTorqueEnergy->w += (bn2-psc5)*sci7;
*
energy
+=
forceFactor
*
(
bn2
-
psc5
)
*
sci7
;
float
scip7
=
qir1
*
atomJ
.
inducedDipoleP
[
0
]
+
qir2
*
atomJ
.
inducedDipoleP
[
1
]
+
qir3
*
atomJ
.
inducedDipoleP
[
2
];
#ifdef APPLY_SCALE
float
gli1
=
-
ci
*
sci4
;
float
gli2
=
-
sc3
*
sci4
+
2
.
0
f
*
sci7
;
float
gli3
=
-
sci4
*
sc5
;
float
glip1
=
-
ci
*
scip4
;
float
glip2
=
-
sc3
*
scip4
+
2
.
0
f
*
scip7
;
float
glip3
=
-
scip4
*
sc5
;
#else
float
gli1
=
-
ci
*
sci4
;
float
gli2
=
-
sc3
*
sci4
+
2
.
0
f
*
(
sci7
+
scip7
);
float
gli3
=
-
sci4
*
sc5
;
#endif
#ifdef APPLY_SCALE
float
gfi1
=
(
bn2
*
(
gli1
+
glip1
)
+
bn3
*
(
gli2
+
glip2
)
+
bn4
*
(
gli3
+
glip3
));
gfi1
-=
(
rr1
*
rr1
)
*
(
3
.
0
f
*
(
gli1
*
psc3
+
glip1
*
dsc3
)
+
5
.
0
f
*
(
gli2
*
psc5
+
glip2
*
dsc5
)
+
7
.
0
f
*
(
gli3
*
psc7
+
glip3
*
dsc7
)
);
#else
float
gfi1
=
bn2
*
gli1
+
bn3
*
gli2
+
bn4
*
gli3
;
gfi1
-=
(
rr1
*
rr1
)
*
(
3
.
0
f
*
gli1
*
psc3
+
5
.
0
f
*
gli2
*
psc5
+
7
.
0
f
*
gli3
*
psc7
);
#endif
gfi1
*=
0
.
5
f
;
ftm21
+=
gfi1
*
xr
;
ftm22
+=
gfi1
*
yr
;
ftm23
+=
gfi1
*
zr
;
if
(
damp
!=
0
.
0
f
){
float
expdamp
=
exp
(
damp
);
float
temp3
=
-
1
.
5
f
*
damp
*
expdamp
*
rr1
*
rr1
;
float
temp5
=
-
damp
;
float
temp7
=
-
0
.
2
f
-
0
.
6
f
*
damp
;
float
ddsc31
=
temp3
*
xr
;
float
ddsc32
=
temp3
*
yr
;
float
ddsc33
=
temp3
*
zr
;
float
ddsc51
=
temp5
*
ddsc31
;
float
ddsc52
=
temp5
*
ddsc32
;
float
ddsc53
=
temp5
*
ddsc33
;
float
ddsc71
=
temp7
*
ddsc51
;
float
ddsc72
=
temp7
*
ddsc52
;
float
ddsc73
=
temp7
*
ddsc53
;
float
rr3
=
rr1
*
rr1
*
rr1
;
#ifdef APPLY_SCALE
temp3
=
(
gli1
*
scalingFactors
[
PScaleIndex
]
+
glip1
*
scalingFactors
[
DScaleIndex
]);
temp5
=
(
3
.
0
f
*
rr1
*
rr1
)
*
(
gli2
*
scalingFactors
[
PScaleIndex
]
+
glip2
*
scalingFactors
[
DScaleIndex
]);
temp7
=
(
15
.
0
f
*
rr3
*
rr1
)
*
(
gli3
*
scalingFactors
[
PScaleIndex
]
+
glip3
*
scalingFactors
[
DScaleIndex
]);
#else
temp3
=
gli1
;
temp5
=
(
3
.
0
f
*
rr1
*
rr1
)
*
gli2
;
temp7
=
(
15
.
0
f
*
rr3
*
rr1
)
*
gli3
;
#endif
ftm21
-=
rr3
*
(
temp3
*
ddsc31
+
temp5
*
ddsc51
+
temp7
*
ddsc71
);
ftm22
-=
rr3
*
(
temp3
*
ddsc32
+
temp5
*
ddsc52
+
temp7
*
ddsc72
);
ftm23
-=
rr3
*
(
temp3
*
ddsc33
+
temp5
*
ddsc53
+
temp7
*
ddsc73
);
}
//K
float
qk1
=
atomJ
.
labFrameQuadrupole
[
0
];
float
qk2
=
atomJ
.
labFrameQuadrupole
[
1
];
float
qk3
=
atomJ
.
labFrameQuadrupole
[
2
];
float
qk5
=
atomJ
.
labFrameQuadrupole
[
3
];
float
qk6
=
atomJ
.
labFrameQuadrupole
[
4
];
//float qk9 = atomJ.labFrameQuadrupole[5];
float
qk9
=
-
(
qk1
+
qk5
);
float
qkui1
=
qk1
*
atomI
.
inducedDipole
[
0
]
+
qk2
*
atomI
.
inducedDipole
[
1
]
+
qk3
*
atomI
.
inducedDipole
[
2
];
float
qkuip1
=
qk1
*
atomI
.
inducedDipoleP
[
0
]
+
qk2
*
atomI
.
inducedDipoleP
[
1
]
+
qk3
*
atomI
.
inducedDipoleP
[
2
];
ftm21
+=
bn2
*
(
qkui1
+
qkuip1
);
#ifdef APPLY_SCALE
ftm21
-=
(
qkui1
*
psc5
+
qkuip1
*
dsc5
);
#else
ftm21
-=
(
qkui1
+
qkuip1
)
*
psc5
;
#endif
float
qkui2
=
qk2
*
atomI
.
inducedDipole
[
0
]
+
qk5
*
atomI
.
inducedDipole
[
1
]
+
qk6
*
atomI
.
inducedDipole
[
2
];
float
qkuip2
=
qk2
*
atomI
.
inducedDipoleP
[
0
]
+
qk5
*
atomI
.
inducedDipoleP
[
1
]
+
qk6
*
atomI
.
inducedDipoleP
[
2
];
ftm22
+=
bn2
*
(
qkui2
+
qkuip2
);
#ifdef APPLY_SCALE
ftm22
-=
((
qkui2
)
*
psc5
+
(
qkuip2
)
*
dsc5
);
#else
ftm22
-=
(
qkui2
+
qkuip2
)
*
psc5
;
#endif
float
qkui3
=
qk3
*
atomI
.
inducedDipole
[
0
]
+
qk6
*
atomI
.
inducedDipole
[
1
]
+
qk9
*
atomI
.
inducedDipole
[
2
];
float
qkuip3
=
qk3
*
atomI
.
inducedDipoleP
[
0
]
+
qk6
*
atomI
.
inducedDipoleP
[
1
]
+
qk9
*
atomI
.
inducedDipoleP
[
2
];
ftm23
+=
bn2
*
(
qkui3
+
qkuip3
);
#ifdef APPLY_SCALE
ftm23
-=
((
qkui3
)
*
psc5
+
(
qkuip3
)
*
dsc5
);
#else
ftm23
-=
(
qkui3
+
qkuip3
)
*
psc5
;
#endif
float
qkr1
=
qk1
*
xr
+
qk2
*
yr
+
qk3
*
zr
;
float
qkr2
=
qk2
*
xr
+
qk5
*
yr
+
qk6
*
zr
;
float
qkr3
=
qk3
*
xr
+
qk6
*
yr
+
qk9
*
zr
;
float
dk1
=
atomJ
.
labFrameDipole
[
0
];
float
dk2
=
atomJ
.
labFrameDipole
[
1
];
float
dk3
=
atomJ
.
labFrameDipole
[
2
];
float
sc4
=
dk1
*
xr
+
dk2
*
yr
+
dk3
*
zr
;
float
sc6
=
qkr1
*
xr
+
qkr2
*
yr
+
qkr3
*
zr
;
float
ck
=
atomJ
.
q
;
float
gfi2
=
(
-
ck
*
bn1
+
sc4
*
bn2
-
sc6
*
bn3
);
prefactor1
=
0
.
5
f
*
(
ck
*
psc3
-
sc4
*
psc5
+
sc6
*
psc7
+
gfi2
);
ftm21
+=
prefactor1
*
atomI
.
inducedDipole
[
0
];
ftm22
+=
prefactor1
*
atomI
.
inducedDipole
[
1
];
ftm23
+=
prefactor1
*
atomI
.
inducedDipole
[
2
];
#ifdef APPLY_SCALE
prefactor1
=
0
.
5
f
*
(
ck
*
dsc3
-
sc4
*
dsc5
+
sc6
*
dsc7
+
gfi2
);
#endif
ftm21
+=
prefactor1
*
atomI
.
inducedDipoleP
[
0
];
ftm22
+=
prefactor1
*
atomI
.
inducedDipoleP
[
1
];
ftm23
+=
prefactor1
*
atomI
.
inducedDipoleP
[
2
];
float
sci3
=
atomI
.
inducedDipole
[
0
]
*
xr
+
atomI
.
inducedDipole
[
1
]
*
yr
+
atomI
.
inducedDipole
[
2
]
*
zr
;
//forceTorqueEnergy->w += 0.5f*( (ck*sci3)*(bn1-psc3) -(sci3*sc4)*(bn2-psc5) + sci3*sc6*(bn3-psc7) );
*
energy
+=
forceFactor
*
0
.
5
f
*
(
(
ck
*
sci3
)
*
(
bn1
-
psc3
)
-
(
sci3
*
sc4
)
*
(
bn2
-
psc5
)
+
sci3
*
sc6
*
(
bn3
-
psc7
)
);
float
scip3
=
atomI
.
inducedDipoleP
[
0
]
*
xr
+
atomI
.
inducedDipoleP
[
1
]
*
yr
+
atomI
.
inducedDipoleP
[
2
]
*
zr
;
if
(
cAmoebaSim
.
polarizationType
==
0
){
#ifdef APPLY_SCALE
prefactor1
=
0
.
5
f
*
(
bn2
-
usc5
);
#else
prefactor1
=
0
.
5
f
*
(
bn2
-
psc5
);
#endif
ftm21
+=
prefactor1
*
(
sci3
*
atomJ
.
inducedDipoleP
[
0
]
+
scip3
*
atomJ
.
inducedDipole
[
0
]
);
ftm22
+=
prefactor1
*
(
sci3
*
atomJ
.
inducedDipoleP
[
1
]
+
scip3
*
atomJ
.
inducedDipole
[
1
]
);
ftm23
+=
prefactor1
*
(
sci3
*
atomJ
.
inducedDipoleP
[
2
]
+
scip3
*
atomJ
.
inducedDipole
[
2
]
);
}
float
sci34
;
if
(
cAmoebaSim
.
polarizationType
==
0
){
float
sci4
=
atomJ
.
inducedDipole
[
0
]
*
xr
+
atomJ
.
inducedDipole
[
1
]
*
yr
+
atomJ
.
inducedDipole
[
2
]
*
zr
;
float
scip4
=
atomJ
.
inducedDipoleP
[
0
]
*
xr
+
atomJ
.
inducedDipoleP
[
1
]
*
yr
+
atomJ
.
inducedDipoleP
[
2
]
*
zr
;
sci34
=
(
sci3
*
scip4
+
scip3
*
sci4
);
#ifdef APPLY_SCALE
gfi1
=
sci34
*
(
usc5
*
(
5
.
0
f
*
rr1
*
rr1
)
-
bn3
);
#else
gfi1
=
sci34
*
(
psc5
*
(
5
.
0
f
*
rr1
*
rr1
)
-
bn3
);
#endif
}
else
{
gfi1
=
0
.
0
f
;
}
#ifdef APPLY_SCALE
prefactor1
=
0
.
5
f
*
(
bn2
*
(
sci3
+
scip3
)
-
(
sci3
*
psc5
+
scip3
*
dsc5
)
);
#else
sci3
+=
scip3
;
prefactor1
=
0
.
5
f
*
sci3
*
(
bn2
-
psc5
);
#endif
ftm21
+=
prefactor1
*
dk1
;
ftm22
+=
prefactor1
*
dk2
;
ftm23
+=
prefactor1
*
dk3
;
#ifdef APPLY_SCALE
float
gfi6
=
-
bn3
*
(
sci3
+
scip3
)
+
(
sci3
*
psc7
+
scip3
*
dsc7
);
#else
float
gfi6
=
sci3
*
(
psc7
-
bn3
);
#endif
ftm21
+=
gfi6
*
qkr1
;
ftm22
+=
gfi6
*
qkr2
;
ftm23
+=
gfi6
*
qkr3
;
float
sci1
=
atomI
.
inducedDipole
[
0
]
*
dk1
+
atomI
.
inducedDipole
[
1
]
*
dk2
+
atomI
.
inducedDipole
[
2
]
*
dk3
+
di1
*
atomJ
.
inducedDipole
[
0
]
+
di2
*
atomJ
.
inducedDipole
[
1
]
+
di3
*
atomJ
.
inducedDipole
[
2
];
//forceTorqueEnergy->w += 0.5f*( sci1*(bn1-psc3) );
*
energy
+=
forceFactor
*
0
.
5
f
*
(
sci1
*
(
bn1
-
psc3
)
);
float
sci8
=
qkr1
*
atomI
.
inducedDipole
[
0
]
+
qkr2
*
atomI
.
inducedDipole
[
1
]
+
qkr3
*
atomI
.
inducedDipole
[
2
];
//forceTorqueEnergy->w += sci8*(bn2-psc5);
*
energy
+=
forceFactor
*
sci8
*
(
bn2
-
psc5
);
float
scip1
=
atomI
.
inducedDipoleP
[
0
]
*
dk1
+
atomI
.
inducedDipoleP
[
1
]
*
dk2
+
atomI
.
inducedDipoleP
[
2
]
*
dk3
+
di1
*
atomJ
.
inducedDipoleP
[
0
]
+
di2
*
atomJ
.
inducedDipoleP
[
1
]
+
di3
*
atomJ
.
inducedDipoleP
[
2
];
#ifndef APPLY_SCALE
sci1
+=
scip1
;
#endif
float
scip2
=
atomI
.
inducedDipole
[
0
]
*
atomJ
.
inducedDipoleP
[
0
]
+
atomI
.
inducedDipole
[
1
]
*
atomJ
.
inducedDipoleP
[
1
]
+
atomI
.
inducedDipole
[
2
]
*
atomJ
.
inducedDipoleP
[
2
]
+
atomJ
.
inducedDipole
[
0
]
*
atomI
.
inducedDipoleP
[
0
]
+
atomJ
.
inducedDipole
[
1
]
*
atomI
.
inducedDipoleP
[
1
]
+
atomJ
.
inducedDipole
[
2
]
*
atomI
.
inducedDipoleP
[
2
];
float
scip8
=
qkr1
*
atomI
.
inducedDipoleP
[
0
]
+
qkr2
*
atomI
.
inducedDipoleP
[
1
]
+
qkr3
*
atomI
.
inducedDipoleP
[
2
];
#ifndef APPLY_SCALE
sci8
+=
scip8
;
#endif
gli1
=
ck
*
sci3
+
sci1
;
gli2
=
-
(
sci3
*
sc4
+
2
.
0
f
*
sci8
);
gli3
=
sci3
*
sc6
;
#ifdef APPLY_SCALE
glip1
=
ck
*
scip3
+
scip1
;
glip2
=
-
(
scip3
*
sc4
+
2
.
0
f
*
scip8
);
glip3
=
scip3
*
sc6
;
#endif
#ifdef APPLY_SCALE
gfi1
+=
(
bn2
*
(
gli1
+
glip1
)
+
bn3
*
(
gli2
+
glip2
)
+
bn4
*
(
gli3
+
glip3
));
gfi1
-=
(
rr1
*
rr1
)
*
(
3
.
0
f
*
(
gli1
*
psc3
+
glip1
*
dsc3
)
+
5
.
0
f
*
(
gli2
*
psc5
+
glip2
*
dsc5
)
+
7
.
0
f
*
(
gli3
*
psc7
+
glip3
*
dsc7
)
);
#else
gfi1
+=
(
bn2
*
gli1
+
bn3
*
gli2
+
bn4
*
gli3
);
gfi1
-=
(
rr1
*
rr1
)
*
(
3
.
0
f
*
gli1
*
psc3
+
5
.
0
f
*
gli2
*
psc5
+
7
.
0
f
*
gli3
*
psc7
);
#endif
if
(
cAmoebaSim
.
polarizationType
==
0
){
#ifdef APPLY_SCALE
gfi1
+=
scip2
*
(
bn2
-
(
3
.
0
f
*
rr1
*
rr1
)
*
usc3
);
#else
gfi1
+=
scip2
*
(
bn2
-
(
3
.
0
f
*
rr1
*
rr1
)
*
psc3
);
#endif
}
gfi1
*=
0
.
5
f
;
ftm21
+=
gfi1
*
xr
;
ftm22
+=
gfi1
*
yr
;
ftm23
+=
gfi1
*
zr
;
if
(
damp
!=
0
.
0
f
){
float
expdamp
=
exp
(
damp
);
float
temp3
=
-
1
.
5
f
*
damp
*
expdamp
*
rr1
*
rr1
;
float
temp5
=
-
damp
;
float
temp7
=
-
0
.
2
f
-
0
.
6
f
*
damp
;
float
ddsc31
=
temp3
*
xr
;
float
ddsc32
=
temp3
*
yr
;
float
ddsc33
=
temp3
*
zr
;
float
ddsc51
=
temp5
*
ddsc31
;
float
ddsc52
=
temp5
*
ddsc32
;
float
ddsc53
=
temp5
*
ddsc33
;
float
ddsc71
=
temp7
*
ddsc51
;
float
ddsc72
=
temp7
*
ddsc52
;
float
ddsc73
=
temp7
*
ddsc53
;
float
rr3
=
rr1
*
rr1
*
rr1
;
#ifdef APPLY_SCALE
temp3
=
gli1
*
scalingFactors
[
PScaleIndex
]
+
glip1
*
scalingFactors
[
DScaleIndex
];
temp5
=
(
3
.
0
f
*
rr1
*
rr1
)
*
(
gli2
*
scalingFactors
[
PScaleIndex
]
+
glip2
*
scalingFactors
[
DScaleIndex
]);
temp7
=
(
15
.
0
f
*
rr3
*
rr1
)
*
(
gli3
*
scalingFactors
[
PScaleIndex
]
+
glip3
*
scalingFactors
[
DScaleIndex
]);
#else
temp3
=
gli1
;
temp5
=
(
3
.
0
f
*
rr1
*
rr1
)
*
gli2
;
temp7
=
(
15
.
0
f
*
rr3
*
rr1
)
*
(
gli3
);
#endif
ftm21
-=
rr3
*
(
temp3
*
ddsc31
+
temp5
*
ddsc51
+
temp7
*
ddsc71
);
ftm22
-=
rr3
*
(
temp3
*
ddsc32
+
temp5
*
ddsc52
+
temp7
*
ddsc72
);
ftm23
-=
rr3
*
(
temp3
*
ddsc33
+
temp5
*
ddsc53
+
temp7
*
ddsc73
);
if
(
cAmoebaSim
.
polarizationType
==
0
){
#ifdef APPLY_SCALE
temp3
=
scalingFactors
[
UScaleIndex
]
*
scip2
;
temp5
=
-
(
3
.
0
f
*
rr1
*
rr1
)
*
scalingFactors
[
UScaleIndex
]
*
sci34
;
#else
temp3
=
scip2
;
temp5
=
-
(
3
.
0
f
*
rr1
*
rr1
)
*
sci34
;
#endif
ftm21
-=
rr3
*
(
temp3
*
ddsc31
+
temp5
*
ddsc51
);
ftm22
-=
rr3
*
(
temp3
*
ddsc32
+
temp5
*
ddsc52
);
ftm23
-=
rr3
*
(
temp3
*
ddsc33
+
temp5
*
ddsc53
);
}
}
force
[
0
]
+=
ftm21
;
force
[
1
]
+=
ftm22
;
force
[
2
]
+=
ftm23
;
/*
if( forceFactor == 1.0f ){
atomJ.force[0] -= ftm21;
atomJ.force[1] -= ftm22;
atomJ.force[2] -= ftm23;
}
atomI.force[0] += ftm21;
atomI.force[1] += ftm22;
atomI.force[2] += ftm23;
*/
/*
forceTorqueEnergy->x += ftm21;
forceTorqueEnergy->y += ftm22;
forceTorqueEnergy->z += ftm23;
*/
return
;
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticT1.h
0 → 100644
View file @
4a1e9683
static
__device__
void
SUB_METHOD_NAME
(
calculatePmeDirectElectrostaticPairIxnT1
,
_kernel
)(
PmeDirectElectrostaticParticle
&
atomI
,
PmeDirectElectrostaticParticle
&
atomJ
,
const
float4
delta
,
const
float4
bn
#ifdef APPLY_SCALE
,
const
float
*
scalingFactors
#endif
){
float
xr
=
delta
.
x
;
float
yr
=
delta
.
y
;
float
zr
=
delta
.
z
;
#ifdef APPLY_SCALE
float
rr1
=
delta
.
w
;
#endif
// set the permanent multipole and induced dipole values;
float
di1
=
atomI
.
labFrameDipole
[
0
];
float
di2
=
atomI
.
labFrameDipole
[
1
];
float
di3
=
atomI
.
labFrameDipole
[
2
];
float
qi1
=
atomI
.
labFrameQuadrupole
[
0
];
float
qi2
=
atomI
.
labFrameQuadrupole
[
1
];
float
qi3
=
atomI
.
labFrameQuadrupole
[
2
];
float
qi5
=
atomI
.
labFrameQuadrupole
[
3
];
float
qi6
=
atomI
.
labFrameQuadrupole
[
4
];
//float qi9 = atomI.labFrameQuadrupole[5];
float
qi9
=
-
(
atomI
.
labFrameQuadrupole
[
0
]
+
atomI
.
labFrameQuadrupole
[
3
]);
float
ck
=
atomJ
.
q
;
float
dk1
=
atomJ
.
labFrameDipole
[
0
];
float
dk2
=
atomJ
.
labFrameDipole
[
1
];
float
dk3
=
atomJ
.
labFrameDipole
[
2
];
float
qk1
=
atomJ
.
labFrameQuadrupole
[
0
];
float
qk2
=
atomJ
.
labFrameQuadrupole
[
1
];
float
qk3
=
atomJ
.
labFrameQuadrupole
[
2
];
float
qk5
=
atomJ
.
labFrameQuadrupole
[
3
];
float
qk6
=
atomJ
.
labFrameQuadrupole
[
4
];
//float qk9 = atomJ.labFrameQuadrupole[5];
float
qk9
=
-
(
atomJ
.
labFrameQuadrupole
[
0
]
+
atomJ
.
labFrameQuadrupole
[
3
]);
float
bn1
=
bn
.
x
;
float
bn2
=
bn
.
y
;
float
bn3
=
bn
.
z
;
float
bn4
=
bn
.
w
;
// apply Thole polarization damping to scale factors
#ifdef APPLY_SCALE
float
rr2
=
rr1
*
rr1
;
float
rr3
=
rr1
*
rr2
;
float
rr5
=
3
.
0
f
*
rr3
*
rr2
;
float
rr7
=
5
.
0
f
*
rr5
*
rr2
;
float
rr9
=
7
.
0
f
*
rr7
*
rr2
;
float
scale
=
1
.
0
f
-
scalingFactors
[
MScaleIndex
];
float
prefactor
=
scale
*
rr3
-
bn1
;
#else
float
prefactor
=
-
bn1
;
#endif
float
dixdk1
=
di2
*
dk3
-
di3
*
dk2
;
float
ttm21
=
prefactor
*
dixdk1
;
float
dixdk2
=
di3
*
dk1
-
di1
*
dk3
;
float
ttm22
=
prefactor
*
dixdk2
;
float
dixdk3
=
di1
*
dk2
-
di2
*
dk1
;
float
ttm23
=
prefactor
*
dixdk3
;
float
qir1
=
qi1
*
xr
+
qi2
*
yr
+
qi3
*
zr
;
float
qir2
=
qi2
*
xr
+
qi5
*
yr
+
qi6
*
zr
;
float
qir3
=
qi3
*
xr
+
qi6
*
yr
+
qi9
*
zr
;
float
qkr1
=
qk1
*
xr
+
qk2
*
yr
+
qk3
*
zr
;
float
qkr2
=
qk2
*
xr
+
qk5
*
yr
+
qk6
*
zr
;
float
qkr3
=
qk3
*
xr
+
qk6
*
yr
+
qk9
*
zr
;
float
qiqkr1
=
qi1
*
qkr1
+
qi2
*
qkr2
+
qi3
*
qkr3
;
float
qiqkr2
=
qi2
*
qkr1
+
qi5
*
qkr2
+
qi6
*
qkr3
;
float
qiqkr3
=
qi3
*
qkr1
+
qi6
*
qkr2
+
qi9
*
qkr3
;
float
rxqikr1
=
yr
*
qiqkr3
-
zr
*
qiqkr2
;
float
qkrxqir1
=
qkr2
*
qir3
-
qkr3
*
qir2
;
#ifdef APPLY_SCALE
prefactor
=
4
.
0
f
*
(
bn3
-
scale
*
rr7
);
#else
prefactor
=
4
.
0
f
*
bn3
;
#endif
ttm21
-=
prefactor
*
(
rxqikr1
+
qkrxqir1
);
float
rxqikr2
=
zr
*
qiqkr1
-
xr
*
qiqkr3
;
float
qkrxqir2
=
qkr3
*
qir1
-
qkr1
*
qir3
;
ttm22
-=
prefactor
*
(
rxqikr2
+
qkrxqir2
);
float
rxqikr3
=
xr
*
qiqkr2
-
yr
*
qiqkr1
;
float
qkrxqir3
=
qkr1
*
qir2
-
qkr2
*
qir1
;
ttm23
-=
prefactor
*
(
rxqikr3
+
qkrxqir3
);
float
qidk1
=
qi1
*
dk1
+
qi2
*
dk2
+
qi3
*
dk3
;
float
qidk2
=
qi2
*
dk1
+
qi5
*
dk2
+
qi6
*
dk3
;
float
qidk3
=
qi3
*
dk1
+
qi6
*
dk2
+
qi9
*
dk3
;
float
dixqkr1
=
di2
*
qkr3
-
di3
*
qkr2
;
float
dkxqir1
=
dk2
*
qir3
-
dk3
*
qir2
;
float
rxqidk1
=
yr
*
qidk3
-
zr
*
qidk2
;
float
qixqk1
=
qi2
*
qk3
+
qi5
*
qk6
+
qi6
*
qk9
-
qi3
*
qk2
-
qi6
*
qk5
-
qi9
*
qk6
;
#ifdef APPLY_SCALE
prefactor
=
2
.
0
f
*
(
bn2
-
scale
*
rr5
);
#else
prefactor
=
2
.
0
f
*
bn2
;
#endif
ttm21
+=
prefactor
*
(
dixqkr1
+
dkxqir1
+
rxqidk1
-
2
.
0
f
*
qixqk1
);
float
dixqkr2
=
di3
*
qkr1
-
di1
*
qkr3
;
float
dkxqir2
=
dk3
*
qir1
-
dk1
*
qir3
;
float
rxqidk2
=
zr
*
qidk1
-
xr
*
qidk3
;
float
qixqk2
=
qi3
*
qk1
+
qi6
*
qk2
+
qi9
*
qk3
-
qi1
*
qk3
-
qi2
*
qk6
-
qi3
*
qk9
;
ttm22
+=
prefactor
*
(
dixqkr2
+
dkxqir2
+
rxqidk2
-
2
.
0
f
*
qixqk2
);
float
dixqkr3
=
di1
*
qkr2
-
di2
*
qkr1
;
float
dkxqir3
=
dk1
*
qir2
-
dk2
*
qir1
;
float
rxqidk3
=
xr
*
qidk2
-
yr
*
qidk1
;
float
qixqk3
=
qi1
*
qk2
+
qi2
*
qk5
+
qi3
*
qk6
-
qi2
*
qk1
-
qi5
*
qk2
-
qi6
*
qk3
;
ttm23
+=
prefactor
*
(
dixqkr3
+
dkxqir3
+
rxqidk3
-
2
.
0
f
*
qixqk3
);
float
sc4
=
dk1
*
xr
+
dk2
*
yr
+
dk3
*
zr
;
float
sc6
=
qkr1
*
xr
+
qkr2
*
yr
+
qkr3
*
zr
;
float
gf2
=
-
ck
*
bn1
+
sc4
*
bn2
-
sc6
*
bn3
;
#ifdef APPLY_SCALE
float
gfr2
=
-
ck
*
rr3
+
sc4
*
rr5
-
sc6
*
rr7
;
prefactor
=
(
gf2
-
scale
*
gfr2
);
#else
prefactor
=
gf2
;
#endif
ttm21
+=
prefactor
*
(
di2
*
zr
-
di3
*
yr
);
ttm22
+=
prefactor
*
(
di3
*
xr
-
di1
*
zr
);
ttm23
+=
prefactor
*
(
di1
*
yr
-
di2
*
xr
);
float
gf5
=
(
-
ck
*
bn2
+
sc4
*
bn3
-
sc6
*
bn4
);
#ifdef APPLY_SCALE
float
gfr5
=
(
-
ck
*
rr5
+
sc4
*
rr7
-
sc6
*
rr9
);
prefactor
=
2
.
0
f
*
(
gf5
-
scale
*
gfr5
);
#else
prefactor
=
2
.
0
f
*
gf5
;
#endif
float
rxqir1
=
yr
*
qir3
-
zr
*
qir2
;
float
rxqir2
=
zr
*
qir1
-
xr
*
qir3
;
float
rxqir3
=
xr
*
qir2
-
yr
*
qir1
;
ttm21
-=
prefactor
*
rxqir1
;
ttm22
-=
prefactor
*
rxqir2
;
ttm23
-=
prefactor
*
rxqir3
;
atomI
.
torque
[
0
]
+=
ttm21
;
atomI
.
torque
[
1
]
+=
ttm22
;
atomI
.
torque
[
2
]
+=
ttm23
;
/*
torque[0] = ttm21;
torque[1] = ttm22;
torque[2] = ttm23;
*/
return
;
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostaticT2.h
0 → 100644
View file @
4a1e9683
static
__device__
void
SUB_METHOD_NAME
(
calculatePmeDirectElectrostaticPairIxnT2
,
_kernel
)(
PmeDirectElectrostaticParticle
&
atomI
,
PmeDirectElectrostaticParticle
&
atomJ
,
const
float4
delta
,
const
float4
bn
#ifdef APPLY_SCALE
,
const
float
*
scalingFactors
#endif
){
float
xr
=
delta
.
x
;
float
yr
=
delta
.
y
;
float
zr
=
delta
.
z
;
float
rr1
=
delta
.
w
;
// set the permanent multipole and induced dipole values;
float
di1
=
atomI
.
labFrameDipole
[
0
];
float
di2
=
atomI
.
labFrameDipole
[
1
];
float
di3
=
atomI
.
labFrameDipole
[
2
];
float
qi1
=
atomI
.
labFrameQuadrupole
[
0
];
float
qi2
=
atomI
.
labFrameQuadrupole
[
1
];
float
qi3
=
atomI
.
labFrameQuadrupole
[
2
];
float
qi5
=
atomI
.
labFrameQuadrupole
[
3
];
float
qi6
=
atomI
.
labFrameQuadrupole
[
4
];
//float qi9 = atomI.labFrameQuadrupole[5];
float
qi9
=
-
(
atomI
.
labFrameQuadrupole
[
0
]
+
atomI
.
labFrameQuadrupole
[
3
]);
float
bn1
=
bn
.
x
;
float
bn2
=
bn
.
y
;
float
bn3
=
bn
.
z
;
// apply Thole polarization damping to scale factors
float
scale3
=
1
.
0
f
;
float
scale5
=
1
.
0
f
;
float
scale7
=
1
.
0
f
;
float
damp
=
atomI
.
damp
*
atomJ
.
damp
;
if
(
damp
!=
0
.
0
f
){
float
pgamma
=
atomI
.
thole
<
atomJ
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
float
ratio
=
1
.
0
f
/
(
rr1
*
damp
);
damp
=
-
pgamma
*
ratio
*
ratio
*
ratio
;
if
(
damp
>
-
50
.
0
f
){
float
expdamp
=
exp
(
damp
);
scale3
=
1
.
0
f
-
expdamp
;
scale5
=
1
.
0
f
-
(
1
.
0
f
-
damp
)
*
expdamp
;
scale7
=
1
.
0
f
-
(
1
.
0
f
-
damp
+
0
.
6
f
*
damp
*
damp
)
*
expdamp
;
}
}
float
rr3
=
rr1
*
rr1
*
rr1
;
#ifdef APPLY_SCALE
float
dsc3
=
rr3
*
(
1
.
0
f
-
scale3
*
scalingFactors
[
DScaleIndex
]);
float
dsc5
=
(
3
.
0
f
*
rr3
*
rr1
*
rr1
)
*
(
1
.
0
f
-
scale5
*
scalingFactors
[
DScaleIndex
]);
float
dsc7
=
(
15
.
0
f
*
rr3
*
rr3
*
rr1
)
*
(
1
.
0
f
-
scale7
*
scalingFactors
[
DScaleIndex
]);
float
psc3
=
rr3
*
(
1
.
0
f
-
scale3
*
scalingFactors
[
PScaleIndex
]);
float
psc5
=
(
3
.
0
f
*
rr3
*
rr1
*
rr1
)
*
(
1
.
0
f
-
scale5
*
scalingFactors
[
PScaleIndex
]);
float
psc7
=
(
15
.
0
f
*
rr3
*
rr3
*
rr1
)
*
(
1
.
0
f
-
scale7
*
scalingFactors
[
PScaleIndex
]);
#else
float
psc3
=
rr3
*
(
1
.
0
f
-
scale3
);
float
psc5
=
(
3
.
0
f
*
rr3
*
rr1
*
rr1
)
*
(
1
.
0
f
-
scale5
);
float
psc7
=
(
15
.
0
f
*
rr3
*
rr3
*
rr1
)
*
(
1
.
0
f
-
scale7
);
#endif
float
prefactor1
=
0
.
5
f
*
(
psc3
-
bn1
);
#ifdef APPLY_SCALE
float
prefactor2
=
0
.
5
f
*
(
dsc3
-
bn1
);
#endif
float
dixuk1
=
di2
*
atomJ
.
inducedDipole
[
2
]
-
di3
*
atomJ
.
inducedDipole
[
1
];
float
dixukp1
=
di2
*
atomJ
.
inducedDipoleP
[
2
]
-
di3
*
atomJ
.
inducedDipoleP
[
1
];
#ifdef APPLY_SCALE
float
ttm2i1
=
prefactor1
*
dixuk1
+
prefactor2
*
dixukp1
;
#else
float
ttm2i1
=
prefactor1
*
(
dixuk1
+
dixukp1
);
#endif
float
dixuk2
=
di3
*
atomJ
.
inducedDipole
[
0
]
-
di1
*
atomJ
.
inducedDipole
[
2
];
float
dixukp2
=
di3
*
atomJ
.
inducedDipoleP
[
0
]
-
di1
*
atomJ
.
inducedDipoleP
[
2
];
#ifdef APPLY_SCALE
float
ttm2i2
=
prefactor1
*
dixuk2
+
prefactor2
*
dixukp2
;
#else
float
ttm2i2
=
prefactor1
*
(
dixuk2
+
dixukp2
);
#endif
float
dixuk3
=
di1
*
atomJ
.
inducedDipole
[
1
]
-
di2
*
atomJ
.
inducedDipole
[
0
];
float
dixukp3
=
di1
*
atomJ
.
inducedDipoleP
[
1
]
-
di2
*
atomJ
.
inducedDipoleP
[
0
];
#ifdef APPLY_SCALE
float
ttm2i3
=
prefactor1
*
dixuk3
+
prefactor2
*
dixukp3
;
#else
float
ttm2i3
=
prefactor1
*
(
dixuk3
+
dixukp3
);
#endif
float
sci4
=
atomJ
.
inducedDipole
[
0
]
*
xr
+
atomJ
.
inducedDipole
[
1
]
*
yr
+
atomJ
.
inducedDipole
[
2
]
*
zr
;
float
scip4
=
atomJ
.
inducedDipoleP
[
0
]
*
xr
+
atomJ
.
inducedDipoleP
[
1
]
*
yr
+
atomJ
.
inducedDipoleP
[
2
]
*
zr
;
float
gti2
=
bn2
*
(
sci4
+
scip4
);
#ifdef APPLY_SCALE
float
gtri2
=
(
sci4
*
psc5
+
scip4
*
dsc5
);
#else
float
gtri2
=
psc5
*
(
sci4
+
scip4
);
#endif
prefactor1
=
0
.
5
f
*
(
gti2
-
gtri2
);
ttm2i1
+=
prefactor1
*
(
di2
*
zr
-
di3
*
yr
);
ttm2i2
+=
prefactor1
*
(
di3
*
xr
-
di1
*
zr
);
ttm2i3
+=
prefactor1
*
(
di1
*
yr
-
di2
*
xr
);
float
qir1
=
qi1
*
xr
+
qi2
*
yr
+
qi3
*
zr
;
float
qir2
=
qi2
*
xr
+
qi5
*
yr
+
qi6
*
zr
;
float
qir3
=
qi3
*
xr
+
qi6
*
yr
+
qi9
*
zr
;
#ifdef APPLY_SCALE
prefactor1
=
sci4
*
psc7
+
scip4
*
dsc7
-
bn3
*
(
sci4
+
scip4
);
#else
prefactor1
=
psc7
*
(
sci4
+
scip4
)
-
bn3
*
(
sci4
+
scip4
);
#endif
ttm2i1
+=
prefactor1
*
(
yr
*
qir3
-
zr
*
qir2
);
ttm2i2
+=
prefactor1
*
(
zr
*
qir1
-
xr
*
qir3
);
ttm2i3
+=
prefactor1
*
(
xr
*
qir2
-
yr
*
qir1
);
float
qiuk1
=
qi1
*
atomJ
.
inducedDipole
[
0
]
+
qi2
*
atomJ
.
inducedDipole
[
1
]
+
qi3
*
atomJ
.
inducedDipole
[
2
];
float
qiuk2
=
qi2
*
atomJ
.
inducedDipole
[
0
]
+
qi5
*
atomJ
.
inducedDipole
[
1
]
+
qi6
*
atomJ
.
inducedDipole
[
2
];
float
qiuk3
=
qi3
*
atomJ
.
inducedDipole
[
0
]
+
qi6
*
atomJ
.
inducedDipole
[
1
]
+
qi9
*
atomJ
.
inducedDipole
[
2
];
float
qiukp1
=
qi1
*
atomJ
.
inducedDipoleP
[
0
]
+
qi2
*
atomJ
.
inducedDipoleP
[
1
]
+
qi3
*
atomJ
.
inducedDipoleP
[
2
];
float
qiukp2
=
qi2
*
atomJ
.
inducedDipoleP
[
0
]
+
qi5
*
atomJ
.
inducedDipoleP
[
1
]
+
qi6
*
atomJ
.
inducedDipoleP
[
2
];
float
qiukp3
=
qi3
*
atomJ
.
inducedDipoleP
[
0
]
+
qi6
*
atomJ
.
inducedDipoleP
[
1
]
+
qi9
*
atomJ
.
inducedDipoleP
[
2
];
prefactor1
=
(
bn2
-
psc5
);
#ifdef APPLY_SCALE
prefactor2
=
(
bn2
-
dsc5
);
#endif
float
ukxqir1
=
atomJ
.
inducedDipole
[
1
]
*
qir3
-
atomJ
.
inducedDipole
[
2
]
*
qir2
;
float
ukxqirp1
=
atomJ
.
inducedDipoleP
[
1
]
*
qir3
-
atomJ
.
inducedDipoleP
[
2
]
*
qir2
;
float
rxqiuk1
=
yr
*
qiuk3
-
zr
*
qiuk2
;
float
rxqiukp1
=
yr
*
qiukp3
-
zr
*
qiukp2
;
#ifdef APPLY_SCALE
ttm2i1
+=
prefactor1
*
(
ukxqir1
+
rxqiuk1
)
+
prefactor2
*
(
ukxqirp1
+
rxqiukp1
);
#else
ttm2i1
+=
prefactor1
*
(
ukxqir1
+
rxqiuk1
+
ukxqirp1
+
rxqiukp1
);
#endif
float
ukxqir2
=
atomJ
.
inducedDipole
[
2
]
*
qir1
-
atomJ
.
inducedDipole
[
0
]
*
qir3
;
float
ukxqirp2
=
atomJ
.
inducedDipoleP
[
2
]
*
qir1
-
atomJ
.
inducedDipoleP
[
0
]
*
qir3
;
float
rxqiuk2
=
zr
*
qiuk1
-
xr
*
qiuk3
;
float
rxqiukp2
=
zr
*
qiukp1
-
xr
*
qiukp3
;
#ifdef APPLY_SCALE
ttm2i2
+=
prefactor1
*
(
ukxqir2
+
rxqiuk2
)
+
prefactor2
*
(
ukxqirp2
+
rxqiukp2
);
#else
ttm2i2
+=
prefactor1
*
(
ukxqir2
+
rxqiuk2
+
ukxqirp2
+
rxqiukp2
);
#endif
float
ukxqir3
=
atomJ
.
inducedDipole
[
0
]
*
qir2
-
atomJ
.
inducedDipole
[
1
]
*
qir1
;
float
ukxqirp3
=
atomJ
.
inducedDipoleP
[
0
]
*
qir2
-
atomJ
.
inducedDipoleP
[
1
]
*
qir1
;
float
rxqiuk3
=
xr
*
qiuk2
-
yr
*
qiuk1
;
float
rxqiukp3
=
xr
*
qiukp2
-
yr
*
qiukp1
;
#ifdef APPLY_SCALE
ttm2i3
+=
prefactor1
*
(
ukxqir3
+
rxqiuk3
)
+
prefactor2
*
(
ukxqirp3
+
rxqiukp3
);
#else
ttm2i3
+=
prefactor1
*
(
ukxqir3
+
rxqiuk3
+
ukxqirp3
+
rxqiukp3
);
#endif
atomI
.
torque
[
0
]
+=
ttm2i1
;
atomI
.
torque
[
1
]
+=
ttm2i2
;
atomI
.
torque
[
2
]
+=
ttm2i3
;
/*
torque[0] += ttm2i1;
torque[1] += ttm2i2;
torque[2] += ttm2i3;
*/
return
;
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
4a1e9683
...
@@ -378,13 +378,16 @@ __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
...
@@ -378,13 +378,16 @@ __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
#endif
static
void
kSorUpdateMutualInducedField_kernel
(
static
void
kSorUpdateMutualInducedField_kernel
(
int
numberOfEntries
,
float
*
polarizability
,
float
*
polarizability
,
float
*
inducedDipole
,
float
*
inducedDipoleP
,
float
*
inducedDipole
,
float
*
inducedDipoleP
,
float
*
fixedEField
,
float
*
fixedEFieldP
,
float
*
fixedEField
,
float
*
fixedEFieldP
,
float
*
matrixProduct
,
float
*
matrixProductP
)
float
*
matrixProduct
,
float
*
matrixProductP
)
{
{
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
const
float
term
=
(
4.0
f
/
3.0
f
)
*
(
cSim
.
alphaEwald
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
)
/
cAmoebaSim
.
sqrtPi
;
const
float
polarSOR
=
0.70
f
;
while
(
pos
<
3
*
cSim
.
atoms
)
while
(
pos
<
3
*
cSim
.
atoms
)
{
{
...
@@ -393,14 +396,12 @@ static void kSorUpdateMutualInducedField_kernel(
...
@@ -393,14 +396,12 @@ static void kSorUpdateMutualInducedField_kernel(
// add self terms to fields
// add self terms to fields
const
float
term
=
(
4.0
f
/
3.0
f
)
*
(
cSim
.
alphaEwald
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
)
/
cAmoebaSim
.
sqrtPi
;
matrixProduct
[
pos
]
+=
term
*
previousDipole
;
matrixProduct
[
pos
]
+=
term
*
previousDipole
;
matrixProductP
[
pos
]
+=
term
*
previousDipoleP
;
matrixProductP
[
pos
]
+=
term
*
previousDipoleP
;
inducedDipole
[
pos
]
=
fixedEField
[
pos
]
+
polarizability
[
pos
]
*
matrixProduct
[
pos
];
inducedDipole
[
pos
]
=
fixedEField
[
pos
]
+
polarizability
[
pos
]
*
matrixProduct
[
pos
];
inducedDipoleP
[
pos
]
=
fixedEFieldP
[
pos
]
+
polarizability
[
pos
]
*
matrixProductP
[
pos
];
inducedDipoleP
[
pos
]
=
fixedEFieldP
[
pos
]
+
polarizability
[
pos
]
*
matrixProductP
[
pos
];
const
float
polarSOR
=
0.70
f
;
inducedDipole
[
pos
]
=
previousDipole
+
polarSOR
*
(
inducedDipole
[
pos
]
-
previousDipole
);
inducedDipole
[
pos
]
=
previousDipole
+
polarSOR
*
(
inducedDipole
[
pos
]
-
previousDipole
);
inducedDipoleP
[
pos
]
=
previousDipoleP
+
polarSOR
*
(
inducedDipoleP
[
pos
]
-
previousDipoleP
);
inducedDipoleP
[
pos
]
=
previousDipoleP
+
polarSOR
*
(
inducedDipoleP
[
pos
]
-
previousDipoleP
);
...
@@ -445,9 +446,9 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
...
@@ -445,9 +446,9 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
static
int
iteration
=
1
;
int
targetAtom
=
546
;
int
targetAtom
=
546
;
static
const
char
*
methodName
=
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply"
;
static
const
char
*
methodName
=
"cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply"
;
static
int
iteration
=
1
;
if
(
1
&&
amoebaGpu
->
log
){
if
(
1
&&
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s
\n
"
,
methodName
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s
\n
"
,
methodName
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fflush
(
amoebaGpu
->
log
);
...
@@ -485,17 +486,30 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
...
@@ -485,17 +486,30 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
#endif
#endif
if
(
gpu
->
bOutputBufferPerWarp
){
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateAmoebaPmeMutualInducedFieldCutoffByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaPmeMutualInducedFieldCutoffByWarp_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
#endif
}
else
{
}
else
{
kCalculateAmoebaPmeMutualInducedFieldCutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
kCalculateAmoebaPmeMutualInducedFieldCutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
threadsPerBlock
,
sizeof
(
MutualInducedParticle
)
*
threadsPerBlock
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pInteractingWorkUnit
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
,
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
,
debugArray
->
_pDevData
,
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
amoebaGpu
->
psWorkArray_3_2
->
_pDevData
);
#endif
}
}
LAUNCHERROR
(
"kCalculateAmoebaPmeMutualInducedField"
);
LAUNCHERROR
(
"kCalculateAmoebaPmeMutualInducedField"
);
...
@@ -507,8 +521,8 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
...
@@ -507,8 +521,8 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
iteration
);
(
void
)
fflush
(
amoebaGpu
->
log
);
outputArray
->
Download
();
outputArray
->
Download
();
outputPolarArray
->
Download
();
outputPolarArray
->
Download
();
//
debugArray->Download();
//
debugArray->Download();
int
maxPrint
=
5
000000
;
int
maxPrint
=
5
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
...
@@ -551,7 +565,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
...
@@ -551,7 +565,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
iteration
++
;
iteration
++
;
}
}
//
delete debugArray;
delete
debugArray
;
#endif
#endif
}
}
...
@@ -592,7 +606,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -592,7 +606,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
// set E_Field & E_FieldPolar] to [ E_Field & E_FieldPolar]*Polarizability
// set E_Field & E_FieldPolar] to [ E_Field & E_FieldPolar]*Polarizability
// initialize [ InducedDipole & InducedDipolePolar ] to [ E_Field & E_FieldPolar]*Polarizability
// initialize [ InducedDipole & InducedDipolePolar ] to [ E_Field & E_FieldPolar]*Polarizability
kInitializeMutualInducedField_kernel
<<<
gpu
->
sim
.
nonbond_
blocks
,
gpu
->
sim
.
bsf_reduce_
threads_per_block
>>>
(
kInitializeMutualInducedField_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
threads_per_block
>>>
(
gpu
->
natoms
,
gpu
->
natoms
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
...
@@ -637,41 +651,47 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -637,41 +651,47 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply
(
amoebaGpu
,
amoebaGpu
->
psWorkVector
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
);
cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply
(
amoebaGpu
,
amoebaGpu
->
psWorkVector
[
0
],
amoebaGpu
->
psWorkVector
[
1
]
);
kCalculateAmoebaPMEInducedDipoleField
(
amoebaGpu
);
kCalculateAmoebaPMEInducedDipoleField
(
amoebaGpu
);
// post matrix multiply
#ifdef AMOEBA_DEBUG
if
(
0
){
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
iteration
);
fileId
.
push_back
(
iteration
);
VectorOfDoubleVectors
outputVector
;
VectorOfDoubleVectors
outputVector
;
/*
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psPolarizability, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
*/
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psWorkVector
[
0
],
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psWorkVector
[
0
],
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psWorkVector
[
1
],
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psWorkVector
[
1
],
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeDirectMI
Pre
"
,
fileId
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaP
rePostP
meDirectMI"
,
fileId
,
outputVector
);
}
}
#endif
// post matrix multiply
kSorUpdateMutualInducedField_kernel
<<<
gpu
->
sim
.
nonbond_
blocks
,
gpu
->
sim
.
bsf_reduce_
threads_per_block
>>>
(
kSorUpdateMutualInducedField_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
threads_per_block
>>>
(
gpu
->
natoms
,
amoebaGpu
->
psPolarizability
->
_pDevData
,
amoebaGpu
->
psPolarizability
->
_pDevData
,
amoebaGpu
->
psInducedDipole
->
_pDevData
,
amoebaGpu
->
psInducedDipolePolar
->
_pDevData
,
amoebaGpu
->
psInducedDipole
->
_pDevData
,
amoebaGpu
->
psInducedDipolePolar
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
amoebaGpu
->
psE_Field
->
_pDevData
,
amoebaGpu
->
psE_FieldPolar
->
_pDevData
,
amoebaGpu
->
psWorkVector
[
0
]
->
_pDevData
,
amoebaGpu
->
psWorkVector
[
1
]
->
_pDevData
);
amoebaGpu
->
psWorkVector
[
0
]
->
_pDevData
,
amoebaGpu
->
psWorkVector
[
1
]
->
_pDevData
);
LAUNCHERROR
(
"kSorUpdatePmeMutualInducedField"
);
LAUNCHERROR
(
"kSorUpdatePmeMutualInducedField"
);
#ifdef AMOEBA_DEBUG
if
(
0
){
if
(
0
){
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
std
::
vector
<
int
>
fileId
;
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
iteration
);
fileId
.
push_back
(
iteration
);
VectorOfDoubleVectors
outputVector
;
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
//
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
//
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psPolarizability
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psPolarizability
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
outputVector
,
gpu
->
psAtomIndex
->
_pSysData
,
1.0
f
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeDirectMI"
,
fileId
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeDirectMI"
,
fileId
,
outputVector
);
//exit(0);
}
}
#endif
// get total epsilon -- performing sums on gpu
// get total epsilon -- performing sums on gpu
kReduceMutualInducedFieldDelta_kernel
<<<
1
,
amoebaGpu
->
epsilonThreadsPerBlock
,
2
*
sizeof
(
float
)
*
amoebaGpu
->
epsilonThreadsPerBlock
>>>
(
kReduceMutualInducedFieldDelta_kernel
<<<
1
,
amoebaGpu
->
epsilonThreadsPerBlock
,
2
*
sizeof
(
float
)
*
amoebaGpu
->
epsilonThreadsPerBlock
>>>
(
...
@@ -679,9 +699,11 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -679,9 +699,11 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu
->
psCurrentEpsilon
->
_pDevData
);
amoebaGpu
->
psCurrentEpsilon
->
_pDevData
);
LAUNCHERROR
(
"kReducePmeMutualInducedFieldDelta"
);
LAUNCHERROR
(
"kReducePmeMutualInducedFieldDelta"
);
#ifdef AMOEBA_DEBUG
if
(
0
&&
amoebaGpu
->
log
){
// trackMutualInducedIterations
if
(
0
&&
amoebaGpu
->
log
){
// trackMutualInducedIterations
trackMutualInducedIterations
(
amoebaGpu
,
iteration
);
trackMutualInducedIterations
(
amoebaGpu
,
iteration
);
}
}
#endif
// Debye=48.033324f
// Debye=48.033324f
amoebaGpu
->
psCurrentEpsilon
->
Download
();
amoebaGpu
->
psCurrentEpsilon
->
Download
();
...
@@ -697,8 +719,8 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -697,8 +719,8 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu
->
psInducedDipole
->
Download
();
amoebaGpu
->
psInducedDipole
->
Download
();
amoebaGpu
->
psInducedDipolePolar
->
Download
();
amoebaGpu
->
psInducedDipolePolar
->
Download
();
#if 1
#if 1
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
%s
iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
cudaComputeAmoebaPmeMutualInducedFieldBySOR
iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d
\n
"
,
methodName
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
1
],
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
2
],
done
);
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
2
],
done
);
#else
#else
...
@@ -765,17 +787,15 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -765,17 +787,15 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
if
(
amoebaGpu
->
log
){
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"MI iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"MI iteration=%3d eps %14.6e done=%d
\n
"
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
iteration
,
amoebaGpu
->
mutualInducedCurrentEpsilon
,
done
);
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
1
],
(
void
)
fflush
(
amoebaGpu
->
log
);
amoebaGpu
->
psCurrentEpsilon
->
_pSysData
[
2
],
done
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
}
#endif
#endif
// exit if nan
// exit if nan
if
(
0
&&
amoebaGpu
->
mutualInducedCurrentEpsilon
!=
amoebaGpu
->
mutualInducedCurrentEpsilon
){
if
(
amoebaGpu
->
mutualInducedCurrentEpsilon
!=
amoebaGpu
->
mutualInducedCurrentEpsilon
){
(
void
)
fprintf
(
stderr
,
"PME MI iteration=%3d eps is nan -- exiting.
\n
"
,
iteration
);
(
void
)
fprintf
(
stderr
,
"PME MI iteration=%3d eps is nan -- exiting.
\n
"
,
iteration
);
exit
(
0
);
exit
(
0
);
}
}
...
@@ -786,6 +806,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -786,6 +806,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu
->
mutualInducedDone
=
done
;
amoebaGpu
->
mutualInducedDone
=
done
;
amoebaGpu
->
mutualInducedConverged
=
(
!
done
||
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
)
?
0
:
1
;
amoebaGpu
->
mutualInducedConverged
=
(
!
done
||
iteration
>
amoebaGpu
->
mutualInducedMaxIterations
)
?
0
:
1
;
#ifdef AMOEBA_DEBUG
if
(
0
){
if
(
0
){
std
::
vector
<
int
>
fileId
;
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
//fileId.push_back( 0 );
...
@@ -801,6 +822,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
...
@@ -801,6 +822,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
checkForNans
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"CudaPmeMI"
,
stderr
);
checkForNans
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipole
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"CudaPmeMI"
,
stderr
);
checkForNans
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
gpu
->
psAtomIndex
->
_pSysData
,
iteration
,
"CudaPmeMIPolar"
,
stderr
);
checkForNans
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psInducedDipolePolar
,
gpu
->
psAtomIndex
->
_pSysData
,
iteration
,
"CudaPmeMIPolar"
,
stderr
);
}
}
#endif
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
}
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRotateFrame.cu
View file @
4a1e9683
...
@@ -396,17 +396,22 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
...
@@ -396,17 +396,22 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s: numBlocks/atoms=%d
\n
"
,
methodName
,
numBlocks
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s: numBlocks/atoms=%d
\n
"
,
methodName
,
numBlocks
);
(
void
)
fflush
(
amoebaGpu
->
log
);
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
Download
();
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
Download
();
amoebaGpu
->
psMolecularDipole
->
Download
();
amoebaGpu
->
psMolecularDipole
->
Download
();
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
Download
();
gpu
->
psPosq4
->
Download
();
gpu
->
psPosq4
->
Download
();
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
int
mIndex
=
3
*
ii
;
int
mIndex
=
3
*
ii
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%6d [%6d %6d %6d %6d] x[%16.9e %16.9e %16.9e] %s
\n
"
,
ii
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%6d [%6d %6d %6d %6d] x[%16.9e %16.9e %16.9e] %s
[%6d %6d %6d %6d]
\n
"
,
ii
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
x
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
x
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
y
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
y
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
z
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
z
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
w
,
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
w
,
gpu
->
psPosq4
->
_pSysData
[
ii
].
x
,
gpu
->
psPosq4
->
_pSysData
[
ii
].
x
,
gpu
->
psPosq4
->
_pSysData
[
ii
].
y
,
gpu
->
psPosq4
->
_pSysData
[
ii
].
y
,
gpu
->
psPosq4
->
_pSysData
[
ii
].
z
,
(
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
w
>
1
?
" XXX"
:
""
)
);
gpu
->
psPosq4
->
_pSysData
[
ii
].
z
,
(
amoebaGpu
->
psMultipoleParticlesIdsAndAxisType
->
_pSysData
[
ii
].
w
>
1
?
" XXX"
:
""
),
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
_pSysData
[
ii
].
x
,
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
_pSysData
[
ii
].
y
,
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
_pSysData
[
ii
].
z
,
amoebaGpu
->
psMultipoleParticlesTorqueBufferIndices
->
_pSysData
[
ii
].
w
);
//if( ii == 30 )ii = gpu->natoms - 30;
//if( ii == 30 )ii = gpu->natoms - 30;
}
}
}
}
...
@@ -491,6 +496,7 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
...
@@ -491,6 +496,7 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
}
else
{
}
else
{
cudaComputeAmoebaPmeElectrostatic
(
amoebaGpu
);
cudaComputeAmoebaPmeElectrostatic
(
amoebaGpu
);
}
}
}
}
#undef AMOEBA_DEBUG
#undef AMOEBA_DEBUG
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaUtilities.cu
View file @
4a1e9683
...
@@ -118,7 +118,7 @@ void kClearFields_3( amoebaGpuContext amoebaGpu, unsigned int numberToClear )
...
@@ -118,7 +118,7 @@ void kClearFields_3( amoebaGpuContext amoebaGpu, unsigned int numberToClear )
{
{
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kClearFields_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
384
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
*
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
kClearFields_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
threads_per_block
>>>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
3
*
gpu
->
sim
.
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevData
);
LAUNCHERROR
(
"kClearFields_3_1"
);
LAUNCHERROR
(
"kClearFields_3_1"
);
if
(
numberToClear
>
1
){
if
(
numberToClear
>
1
){
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaVdw14_7.cu
View file @
4a1e9683
...
@@ -534,10 +534,12 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
...
@@ -534,10 +534,12 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
Vdw14_7Particle
),
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
Vdw14_7Particle
),
gpu
->
sharedMemoryPerBlock
),
maxThreads
);
}
}
#ifdef AMOEBA_DEBUG_PRINT
if
(
0
){
if
(
0
){
static
int
iteration
=
0
;
static
int
iteration
=
0
;
checkForNansFloat4
(
gpu
->
natoms
,
gpu
->
psPosq4
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"
\n\n
zCoordPreCopyVdw"
,
stderr
);
checkForNansFloat4
(
gpu
->
natoms
,
gpu
->
psPosq4
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"
\n\n
zCoordPreCopyVdw"
,
stderr
);
}
}
#endif
kCalculateAmoebaVdw14_7CopyCoordinates
(
amoebaGpu
,
gpu
->
psPosq4
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
kCalculateAmoebaVdw14_7CopyCoordinates
(
amoebaGpu
,
gpu
->
psPosq4
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
kCalculateAmoebaVdw14_7CoordinateReduction
(
amoebaGpu
,
amoebaGpu
->
psAmoebaVdwCoordinates
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
kCalculateAmoebaVdw14_7CoordinateReduction
(
amoebaGpu
,
amoebaGpu
->
psAmoebaVdwCoordinates
,
amoebaGpu
->
psAmoebaVdwCoordinates
);
...
@@ -725,22 +727,27 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
...
@@ -725,22 +727,27 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
}
}
#endif
#endif
#ifdef AMOEBA_DEBUG
if
(
0
){
if
(
0
){
static
int
iteration
=
0
;
static
int
iteration
=
0
;
checkForNansFloat4
(
gpu
->
natoms
,
amoebaGpu
->
gpuContext
->
psForce4
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"PreVdw"
,
stderr
);
checkForNansFloat4
(
gpu
->
natoms
,
amoebaGpu
->
gpuContext
->
psForce4
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"PreVdw"
,
stderr
);
checkForNansFloat4
(
gpu
->
natoms
,
gpu
->
psPosq4
,
gpu
->
psAtomIndex
->
_pSysData
,
iteration
,
"zCoordPreVdw"
,
stderr
);
checkForNansFloat4
(
gpu
->
natoms
,
gpu
->
psPosq4
,
gpu
->
psAtomIndex
->
_pSysData
,
iteration
,
"zCoordPreVdw"
,
stderr
);
}
}
#endif
kReduceVdw14_7
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
);
kReduceVdw14_7
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
);
#ifdef AMOEBA_DEBUG
if
(
0
){
if
(
0
){
static
int
iteration
=
0
;
static
int
iteration
=
0
;
checkForNans
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psWorkArray_3_2
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"Vdw32"
,
stderr
);
checkForNans
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psWorkArray_3_2
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"Vdw32"
,
stderr
);
}
}
#endif
kCalculateAmoebaVdw14_7Reduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
amoebaGpu
->
gpuContext
->
psForce4
);
kCalculateAmoebaVdw14_7Reduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
amoebaGpu
->
gpuContext
->
psForce4
);
kCalculateAmoebaVdw14_7NonReduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
amoebaGpu
->
gpuContext
->
psForce4
);
kCalculateAmoebaVdw14_7NonReduction
(
amoebaGpu
,
amoebaGpu
->
psWorkArray_3_2
,
amoebaGpu
->
gpuContext
->
psForce4
);
#ifdef AMOEBA_DEBUG
if
(
0
){
if
(
0
){
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
CUDAStream
<
float4
>*
psTempForce
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
,
1
,
"psTempForce"
);
CUDAStream
<
float4
>*
psTempForce
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
,
1
,
"psTempForce"
);
...
@@ -761,6 +768,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
...
@@ -761,6 +768,7 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
static
int
iteration
=
0
;
static
int
iteration
=
0
;
checkForNansFloat4
(
gpu
->
natoms
,
amoebaGpu
->
gpuContext
->
psForce4
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"VdwForce"
,
stderr
);
checkForNansFloat4
(
gpu
->
natoms
,
amoebaGpu
->
gpuContext
->
psForce4
,
gpu
->
psAtomIndex
->
_pSysData
,
++
iteration
,
"VdwForce"
,
stderr
);
}
}
#endif
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
delete
debugArray
;
delete
debugArray
;
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment