Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
92a338cf
Commit
92a338cf
authored
Oct 07, 2010
by
Peter Eastman
Browse files
Optimizations to PME direct space computation
parent
b20978e1
Changes
8
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
708 additions
and
738 deletions
+708
-738
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedFieldParticle.h
...cuda/src/kernels/kCalculateAmoebaCudaFixedFieldParticle.h
+5
-3
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedParticle.h
...a/src/kernels/kCalculateAmoebaCudaMutualInducedParticle.h
+10
-6
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+527
-553
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
.../src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
...ms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+54
-58
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
...rms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
+30
-30
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+55
-61
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
...a/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
+26
-26
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedFieldParticle.h
View file @
92a338cf
...
@@ -59,7 +59,8 @@ __device__ static void loadFixedFieldShared( struct FixedFieldParticle* sA, unsi
...
@@ -59,7 +59,8 @@ __device__ static void loadFixedFieldShared( struct FixedFieldParticle* sA, unsi
{
{
// coordinates & charge
// coordinates & charge
sA
->
x
=
cSim
.
pPosq
[
atomI
].
x
;
float4
posq
=
cSim
.
pPosq
[
atomI
];
sA
->
x
=
posq
.
x
;
sA
->
y
=
cSim
.
pPosq
[
atomI
].
y
;
sA
->
y
=
cSim
.
pPosq
[
atomI
].
y
;
sA
->
z
=
cSim
.
pPosq
[
atomI
].
z
;
sA
->
z
=
cSim
.
pPosq
[
atomI
].
z
;
sA
->
q
=
cSim
.
pPosq
[
atomI
].
w
;
sA
->
q
=
cSim
.
pPosq
[
atomI
].
w
;
...
@@ -79,8 +80,9 @@ __device__ static void loadFixedFieldShared( struct FixedFieldParticle* sA, unsi
...
@@ -79,8 +80,9 @@ __device__ static void loadFixedFieldShared( struct FixedFieldParticle* sA, unsi
sA
->
labFrameQuadrupole_YZ
=
cAmoebaSim
.
pLabFrameQuadrupole
[
atomI
*
9
+
5
];
sA
->
labFrameQuadrupole_YZ
=
cAmoebaSim
.
pLabFrameQuadrupole
[
atomI
*
9
+
5
];
sA
->
labFrameQuadrupole_ZZ
=
cAmoebaSim
.
pLabFrameQuadrupole
[
atomI
*
9
+
8
];
sA
->
labFrameQuadrupole_ZZ
=
cAmoebaSim
.
pLabFrameQuadrupole
[
atomI
*
9
+
8
];
sA
->
damp
=
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
;
float2
dampingFactorAndThole
=
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
];
sA
->
thole
=
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
;
sA
->
damp
=
dampingFactorAndThole
.
x
;
sA
->
thole
=
dampingFactorAndThole
.
y
;
#ifdef GK
#ifdef GK
sA
->
bornR
=
bornR
[
atomI
];
sA
->
bornR
=
bornR
[
atomI
];
#endif
#endif
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedParticle.h
View file @
92a338cf
...
@@ -23,6 +23,8 @@ struct MutualInducedParticle {
...
@@ -23,6 +23,8 @@ struct MutualInducedParticle {
float
fieldS
[
3
];
float
fieldS
[
3
];
float
fieldPolarS
[
3
];
float
fieldPolarS
[
3
];
#else
float
padding
;
#endif
#endif
#ifdef INCLUDE_MI_FIELD_BUFFERS
#ifdef INCLUDE_MI_FIELD_BUFFERS
...
@@ -35,10 +37,11 @@ __device__ static void loadMutualInducedShared( MutualInducedParticle* sA, unsig
...
@@ -35,10 +37,11 @@ __device__ static void loadMutualInducedShared( MutualInducedParticle* sA, unsig
{
{
// coordinates & charge
// coordinates & charge
sA
->
x
=
cSim
.
pPosq
[
atomI
].
x
;
float4
posq
=
cSim
.
pPosq
[
atomI
];
sA
->
y
=
cSim
.
pPosq
[
atomI
].
y
;
sA
->
x
=
posq
.
x
;
sA
->
z
=
cSim
.
pPosq
[
atomI
].
z
;
sA
->
y
=
posq
.
y
;
sA
->
q
=
cSim
.
pPosq
[
atomI
].
w
;
sA
->
z
=
posq
.
z
;
sA
->
q
=
posq
.
w
;
// dipole
// dipole
...
@@ -52,8 +55,9 @@ __device__ static void loadMutualInducedShared( MutualInducedParticle* sA, unsig
...
@@ -52,8 +55,9 @@ __device__ static void loadMutualInducedShared( MutualInducedParticle* sA, unsig
sA
->
inducedDipolePolar
[
1
]
=
cAmoebaSim
.
pInducedDipolePolar
[
atomI
*
3
+
1
];
sA
->
inducedDipolePolar
[
1
]
=
cAmoebaSim
.
pInducedDipolePolar
[
atomI
*
3
+
1
];
sA
->
inducedDipolePolar
[
2
]
=
cAmoebaSim
.
pInducedDipolePolar
[
atomI
*
3
+
2
];
sA
->
inducedDipolePolar
[
2
]
=
cAmoebaSim
.
pInducedDipolePolar
[
atomI
*
3
+
2
];
sA
->
damp
=
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
;
float2
dampingFactorAndThole
=
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
];
sA
->
thole
=
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
;
sA
->
damp
=
dampingFactorAndThole
.
x
;
sA
->
thole
=
dampingFactorAndThole
.
y
;
#ifdef GK
#ifdef GK
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
View file @
92a338cf
This diff is collapsed.
Click to expand it.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
View file @
92a338cf
...
@@ -29,7 +29,7 @@
...
@@ -29,7 +29,7 @@
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
384
,
1
)
__launch_bounds__
(
384
,
1
)
#elif (__CUDA_ARCH__ >= 1
3
0)
#elif (__CUDA_ARCH__ >= 1
2
0)
__launch_bounds__
(
128
,
1
)
__launch_bounds__
(
128
,
1
)
#else
#else
__launch_bounds__
(
64
,
1
)
__launch_bounds__
(
64
,
1
)
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
View file @
92a338cf
...
@@ -167,7 +167,7 @@ __device__ void sumTempBuffer( FixedFieldParticle& atomI, FixedFieldParticle& at
...
@@ -167,7 +167,7 @@ __device__ void sumTempBuffer( FixedFieldParticle& atomI, FixedFieldParticle& at
}
}
__device__
void
calculateFixedFieldRealSpacePairIxn_kernel
(
FixedFieldParticle
&
atomI
,
FixedFieldParticle
&
atomJ
,
__device__
void
calculateFixedFieldRealSpacePairIxn_kernel
(
FixedFieldParticle
&
atomI
,
FixedFieldParticle
&
atomJ
,
float
dscale
,
float
pscale
,
float
fields
[
4
][
3
]
float
dscale
,
float
pscale
,
float
4
fields
[
3
]
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
float4
*
pullBack
,
float4
*
pullBack
#endif
#endif
...
@@ -192,20 +192,19 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
...
@@ -192,20 +192,19 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
// calculate the error function damping terms
// calculate the error function damping terms
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
bn
[
4
];
bn
[
0
]
=
erfc
(
ralpha
)
/
r
;
float
bn0
=
erfc
(
ralpha
)
/
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cSim
.
alphaEwald
);
float
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cSim
.
alphaEwald
);
float
exp2a
=
exp
(
-
(
ralpha
*
ralpha
));
float
exp2a
=
exp
(
-
(
ralpha
*
ralpha
));
alsq2n
*=
alsq2
;
alsq2n
*=
alsq2
;
bn
[
1
]
=
(
bn
[
0
]
+
alsq2n
*
exp2a
)
/
r2
;
float
bn1
=
(
bn
0
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
alsq2n
*=
alsq2
;
bn
[
2
]
=
(
3.0
f
*
bn
[
1
]
+
alsq2n
*
exp2a
)
/
r2
;
float
bn2
=
(
3.0
f
*
bn
1
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
alsq2n
*=
alsq2
;
bn
[
3
]
=
(
5.0
f
*
bn
[
2
]
+
alsq2n
*
exp2a
)
/
r2
;
float
bn3
=
(
5.0
f
*
bn
2
+
alsq2n
*
exp2a
)
/
r2
;
// compute the error function scaled and unscaled terms
// compute the error function scaled and unscaled terms
...
@@ -262,99 +261,96 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
...
@@ -262,99 +261,96 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
float
qkz
=
atomJ
.
labFrameQuadrupole_XZ
*
xr
+
atomJ
.
labFrameQuadrupole_YZ
*
yr
+
atomJ
.
labFrameQuadrupole_ZZ
*
zr
;
float
qkz
=
atomJ
.
labFrameQuadrupole_XZ
*
xr
+
atomJ
.
labFrameQuadrupole_YZ
*
yr
+
atomJ
.
labFrameQuadrupole_ZZ
*
zr
;
float
qkr
=
qkx
*
xr
+
qky
*
yr
+
qkz
*
zr
;
float
qkr
=
qkx
*
xr
+
qky
*
yr
+
qkz
*
zr
;
float
fim
[
3
],
fkm
[
3
];
float
fim0
=
-
xr
*
(
bn1
*
atomJ
.
q
-
bn2
*
dkr
+
bn3
*
qkr
)
float
fid
[
3
],
fkd
[
3
];
-
bn1
*
atomJ
.
labFrameDipole_X
+
2.0
f
*
bn2
*
qkx
;
float
fip
[
3
],
fkp
[
3
];
fim
[
0
]
=
-
xr
*
(
bn
[
1
]
*
atomJ
.
q
-
bn
[
2
]
*
dkr
+
bn
[
3
]
*
qkr
)
-
bn
[
1
]
*
atomJ
.
labFrameDipole_X
+
2.0
f
*
bn
[
2
]
*
qkx
;
f
im
[
1
]
=
-
yr
*
(
bn
[
1
]
*
atomJ
.
q
-
bn
[
2
]
*
dkr
+
bn
[
3
]
*
qkr
)
f
loat
fim1
=
-
yr
*
(
bn
1
*
atomJ
.
q
-
bn
2
*
dkr
+
bn
3
*
qkr
)
-
bn
[
1
]
*
atomJ
.
labFrameDipole_Y
+
2.0
f
*
bn
[
2
]
*
qky
;
-
bn
1
*
atomJ
.
labFrameDipole_Y
+
2.0
f
*
bn
2
*
qky
;
f
im
[
2
]
=
-
zr
*
(
bn
[
1
]
*
atomJ
.
q
-
bn
[
2
]
*
dkr
+
bn
[
3
]
*
qkr
)
f
loat
fim2
=
-
zr
*
(
bn
1
*
atomJ
.
q
-
bn
2
*
dkr
+
bn
3
*
qkr
)
-
bn
[
1
]
*
atomJ
.
labFrameDipole_Z
+
2.0
f
*
bn
[
2
]
*
qkz
;
-
bn
1
*
atomJ
.
labFrameDipole_Z
+
2.0
f
*
bn
2
*
qkz
;
f
km
[
0
]
=
xr
*
(
bn
[
1
]
*
atomI
.
q
+
bn
[
2
]
*
dir
+
bn
[
3
]
*
qir
)
f
loat
fkm0
=
xr
*
(
bn
1
*
atomI
.
q
+
bn
2
*
dir
+
bn
3
*
qir
)
-
bn
[
1
]
*
atomI
.
labFrameDipole_X
-
2.0
f
*
bn
[
2
]
*
qix
;
-
bn
1
*
atomI
.
labFrameDipole_X
-
2.0
f
*
bn
2
*
qix
;
f
km
[
1
]
=
yr
*
(
bn
[
1
]
*
atomI
.
q
+
bn
[
2
]
*
dir
+
bn
[
3
]
*
qir
)
f
loat
fkm1
=
yr
*
(
bn
1
*
atomI
.
q
+
bn
2
*
dir
+
bn
3
*
qir
)
-
bn
[
1
]
*
atomI
.
labFrameDipole_Y
-
2.0
f
*
bn
[
2
]
*
qiy
;
-
bn
1
*
atomI
.
labFrameDipole_Y
-
2.0
f
*
bn
2
*
qiy
;
f
km
[
2
]
=
zr
*
(
bn
[
1
]
*
atomI
.
q
+
bn
[
2
]
*
dir
+
bn
[
3
]
*
qir
)
f
loat
fkm2
=
zr
*
(
bn
1
*
atomI
.
q
+
bn
2
*
dir
+
bn
3
*
qir
)
-
bn
[
1
]
*
atomI
.
labFrameDipole_Z
-
2.0
f
*
bn
[
2
]
*
qiz
;
-
bn
1
*
atomI
.
labFrameDipole_Z
-
2.0
f
*
bn
2
*
qiz
;
f
id
[
0
]
=
-
xr
*
(
drr3
*
atomJ
.
q
-
drr5
*
dkr
+
drr7
*
qkr
)
f
loat
fid0
=
-
xr
*
(
drr3
*
atomJ
.
q
-
drr5
*
dkr
+
drr7
*
qkr
)
-
drr3
*
atomJ
.
labFrameDipole_X
+
2.0
f
*
drr5
*
qkx
;
-
drr3
*
atomJ
.
labFrameDipole_X
+
2.0
f
*
drr5
*
qkx
;
f
id
[
1
]
=
-
yr
*
(
drr3
*
atomJ
.
q
-
drr5
*
dkr
+
drr7
*
qkr
)
f
loat
fid1
=
-
yr
*
(
drr3
*
atomJ
.
q
-
drr5
*
dkr
+
drr7
*
qkr
)
-
drr3
*
atomJ
.
labFrameDipole_Y
+
2.0
f
*
drr5
*
qky
;
-
drr3
*
atomJ
.
labFrameDipole_Y
+
2.0
f
*
drr5
*
qky
;
f
id
[
2
]
=
-
zr
*
(
drr3
*
atomJ
.
q
-
drr5
*
dkr
+
drr7
*
qkr
)
f
loat
fid2
=
-
zr
*
(
drr3
*
atomJ
.
q
-
drr5
*
dkr
+
drr7
*
qkr
)
-
drr3
*
atomJ
.
labFrameDipole_Z
+
2.0
f
*
drr5
*
qkz
;
-
drr3
*
atomJ
.
labFrameDipole_Z
+
2.0
f
*
drr5
*
qkz
;
f
kd
[
0
]
=
xr
*
(
drr3
*
atomI
.
q
+
drr5
*
dir
+
drr7
*
qir
)
f
loat
fkd0
=
xr
*
(
drr3
*
atomI
.
q
+
drr5
*
dir
+
drr7
*
qir
)
-
drr3
*
atomI
.
labFrameDipole_X
-
2.0
f
*
drr5
*
qix
;
-
drr3
*
atomI
.
labFrameDipole_X
-
2.0
f
*
drr5
*
qix
;
f
kd
[
1
]
=
yr
*
(
drr3
*
atomI
.
q
+
drr5
*
dir
+
drr7
*
qir
)
f
loat
fkd1
=
yr
*
(
drr3
*
atomI
.
q
+
drr5
*
dir
+
drr7
*
qir
)
-
drr3
*
atomI
.
labFrameDipole_Y
-
2.0
f
*
drr5
*
qiy
;
-
drr3
*
atomI
.
labFrameDipole_Y
-
2.0
f
*
drr5
*
qiy
;
f
kd
[
2
]
=
zr
*
(
drr3
*
atomI
.
q
+
drr5
*
dir
+
drr7
*
qir
)
f
loat
fkd2
=
zr
*
(
drr3
*
atomI
.
q
+
drr5
*
dir
+
drr7
*
qir
)
-
drr3
*
atomI
.
labFrameDipole_Z
-
2.0
f
*
drr5
*
qiz
;
-
drr3
*
atomI
.
labFrameDipole_Z
-
2.0
f
*
drr5
*
qiz
;
f
ip
[
0
]
=
-
xr
*
(
prr3
*
atomJ
.
q
-
prr5
*
dkr
+
prr7
*
qkr
)
f
loat
fip0
=
-
xr
*
(
prr3
*
atomJ
.
q
-
prr5
*
dkr
+
prr7
*
qkr
)
-
prr3
*
atomJ
.
labFrameDipole_X
+
2.0
f
*
prr5
*
qkx
;
-
prr3
*
atomJ
.
labFrameDipole_X
+
2.0
f
*
prr5
*
qkx
;
f
ip
[
1
]
=
-
yr
*
(
prr3
*
atomJ
.
q
-
prr5
*
dkr
+
prr7
*
qkr
)
f
loat
fip1
=
-
yr
*
(
prr3
*
atomJ
.
q
-
prr5
*
dkr
+
prr7
*
qkr
)
-
prr3
*
atomJ
.
labFrameDipole_Y
+
2.0
f
*
prr5
*
qky
;
-
prr3
*
atomJ
.
labFrameDipole_Y
+
2.0
f
*
prr5
*
qky
;
f
ip
[
2
]
=
-
zr
*
(
prr3
*
atomJ
.
q
-
prr5
*
dkr
+
prr7
*
qkr
)
f
loat
fip2
=
-
zr
*
(
prr3
*
atomJ
.
q
-
prr5
*
dkr
+
prr7
*
qkr
)
-
prr3
*
atomJ
.
labFrameDipole_Z
+
2.0
f
*
prr5
*
qkz
;
-
prr3
*
atomJ
.
labFrameDipole_Z
+
2.0
f
*
prr5
*
qkz
;
f
kp
[
0
]
=
xr
*
(
prr3
*
atomI
.
q
+
prr5
*
dir
+
prr7
*
qir
)
f
loat
fkp0
=
xr
*
(
prr3
*
atomI
.
q
+
prr5
*
dir
+
prr7
*
qir
)
-
prr3
*
atomI
.
labFrameDipole_X
-
2.0
f
*
prr5
*
qix
;
-
prr3
*
atomI
.
labFrameDipole_X
-
2.0
f
*
prr5
*
qix
;
f
kp
[
1
]
=
yr
*
(
prr3
*
atomI
.
q
+
prr5
*
dir
+
prr7
*
qir
)
f
loat
fkp1
=
yr
*
(
prr3
*
atomI
.
q
+
prr5
*
dir
+
prr7
*
qir
)
-
prr3
*
atomI
.
labFrameDipole_Y
-
2.0
f
*
prr5
*
qiy
;
-
prr3
*
atomI
.
labFrameDipole_Y
-
2.0
f
*
prr5
*
qiy
;
f
kp
[
2
]
=
zr
*
(
prr3
*
atomI
.
q
+
prr5
*
dir
+
prr7
*
qir
)
f
loat
fkp2
=
zr
*
(
prr3
*
atomI
.
q
+
prr5
*
dir
+
prr7
*
qir
)
-
prr3
*
atomI
.
labFrameDipole_Z
-
2.0
f
*
prr5
*
qiz
;
-
prr3
*
atomI
.
labFrameDipole_Z
-
2.0
f
*
prr5
*
qiz
;
// increment the field at each site due to this interaction
// increment the field at each site due to this interaction
if
(
r2
<=
cSim
.
nonbondedCutoffSqr
){
if
(
r2
<=
cSim
.
nonbondedCutoffSqr
){
fields
[
0
]
[
0
]
=
fim
[
0
]
-
fid
[
0
]
;
fields
[
0
]
.
x
=
fim
0
-
fid
0
;
fields
[
0
][
1
]
=
fim
[
1
]
-
fid
[
1
]
;
fields
[
1
]
.
x
=
fim
1
-
fid
1
;
fields
[
0
][
2
]
=
fim
[
2
]
-
fid
[
2
]
;
fields
[
2
]
.
x
=
fim
2
-
fid
2
;
fields
[
1
][
0
]
=
fkm
[
0
]
-
fkd
[
0
]
;
fields
[
0
]
.
y
=
fkm
0
-
fkd
0
;
fields
[
1
]
[
1
]
=
fkm
[
1
]
-
fkd
[
1
]
;
fields
[
1
]
.
y
=
fkm
1
-
fkd
1
;
fields
[
1
][
2
]
=
fkm
[
2
]
-
fkd
[
2
]
;
fields
[
2
]
.
y
=
fkm
2
-
fkd
2
;
fields
[
2
][
0
]
=
fim
[
0
]
-
fip
[
0
]
;
fields
[
0
]
.
z
=
fim
0
-
fip
0
;
fields
[
2
][
1
]
=
fim
[
1
]
-
fip
[
1
]
;
fields
[
1
]
.
z
=
fim
1
-
fip
1
;
fields
[
2
]
[
2
]
=
fim
[
2
]
-
fip
[
2
]
;
fields
[
2
]
.
z
=
fim
2
-
fip
2
;
fields
[
3
][
0
]
=
fkm
[
0
]
-
fkp
[
0
]
;
fields
[
0
]
.
w
=
fkm
0
-
fkp
0
;
fields
[
3
][
1
]
=
fkm
[
1
]
-
fkp
[
1
]
;
fields
[
1
]
.
w
=
fkm
1
-
fkp
1
;
fields
[
3
][
2
]
=
fkm
[
2
]
-
fkp
[
2
]
;
fields
[
2
]
.
w
=
fkm
2
-
fkp
2
;
}
else
{
}
else
{
fields
[
0
]
[
0
]
=
0.0
f
;
fields
[
0
]
.
x
=
0.0
f
;
fields
[
1
][
0
]
=
0.0
f
;
fields
[
0
]
.
y
=
0.0
f
;
fields
[
2
][
0
]
=
0.0
f
;
fields
[
0
]
.
z
=
0.0
f
;
fields
[
3
][
0
]
=
0.0
f
;
fields
[
0
]
.
w
=
0.0
f
;
fields
[
0
][
1
]
=
0.0
f
;
fields
[
1
]
.
x
=
0.0
f
;
fields
[
1
]
[
1
]
=
0.0
f
;
fields
[
1
]
.
y
=
0.0
f
;
fields
[
2
][
1
]
=
0.0
f
;
fields
[
1
]
.
z
=
0.0
f
;
fields
[
3
][
1
]
=
0.0
f
;
fields
[
1
]
.
w
=
0.0
f
;
fields
[
0
][
2
]
=
0.0
f
;
fields
[
2
]
.
x
=
0.0
f
;
fields
[
1
][
2
]
=
0.0
f
;
fields
[
2
]
.
y
=
0.0
f
;
fields
[
2
]
[
2
]
=
0.0
f
;
fields
[
2
]
.
z
=
0.0
f
;
fields
[
3
][
2
]
=
0.0
f
;
fields
[
2
]
.
w
=
0.0
f
;
}
}
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
...
@@ -441,7 +437,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
...
@@ -441,7 +437,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
if
(
gpu
->
sm_version
>=
SM_20
)
if
(
gpu
->
sm_version
>=
SM_20
)
maxThreads
=
384
;
maxThreads
=
384
;
else
if
(
gpu
->
sm_version
>=
SM_12
)
else
if
(
gpu
->
sm_version
>=
SM_12
)
maxThreads
=
1
28
;
maxThreads
=
1
92
;
else
else
maxThreads
=
64
;
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)),
maxThreads
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
View file @
92a338cf
...
@@ -28,11 +28,11 @@
...
@@ -28,11 +28,11 @@
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
384
,
1
)
#elif (__CUDA_ARCH__ >= 1
3
0)
#elif (__CUDA_ARCH__ >= 1
2
0)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
192
,
1
)
#else
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
64
,
1
)
#endif
#endif
void
METHOD_NAME
(
kCalculateAmoebaPmeDirectFixedE_Field
,
_kernel
)(
void
METHOD_NAME
(
kCalculateAmoebaPmeDirectFixedE_Field
,
_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
*
workUnit
,
...
@@ -117,7 +117,7 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
...
@@ -117,7 +117,7 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
getMaskedPScaleFactor
(
j
,
pScaleMask
,
&
pScaleValue
);
getMaskedPScaleFactor
(
j
,
pScaleMask
,
&
pScaleValue
);
}
}
float
ijField
[
4
][
3
];
float
4
ijField
[
3
];
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
j
],
dScaleValue
,
pScaleValue
,
ijField
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
j
],
dScaleValue
,
pScaleValue
,
ijField
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
pullBack
,
pullBack
...
@@ -131,13 +131,13 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
...
@@ -131,13 +131,13 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum
[
0
]
+=
match
?
0
.
0
f
:
ijField
[
0
]
[
0
]
;
fieldSum
[
0
]
+=
match
?
0
.
0
f
:
ijField
[
0
]
.
x
;
fieldSum
[
1
]
+=
match
?
0
.
0
f
:
ijField
[
0
][
1
];
fieldSum
[
1
]
+=
match
?
0
.
0
f
:
ijField
[
1
]
.
x
;
fieldSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
0
][
2
];
fieldSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
2
]
.
x
;
fieldPolarSum
[
0
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
0
];
fieldPolarSum
[
0
]
+=
match
?
0
.
0
f
:
ijField
[
0
]
.
z
;
fieldPolarSum
[
1
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
1
];
fieldPolarSum
[
1
]
+=
match
?
0
.
0
f
:
ijField
[
1
]
.
z
;
fieldPolarSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
2
]
[
2
]
;
fieldPolarSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
2
]
.
z
;
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
targetAtom
==
(
y
+
j
)
){
if
(
atomI
==
targetAtom
||
targetAtom
==
(
y
+
j
)
){
...
@@ -234,7 +234,7 @@ if( atomI == targetAtom || targetAtom == (y+j) ){
...
@@ -234,7 +234,7 @@ if( atomI == targetAtom || targetAtom == (y+j) ){
getMaskedPScaleFactor
(
jIdx
,
pScaleMask
,
&
pScaleValue
);
getMaskedPScaleFactor
(
jIdx
,
pScaleMask
,
&
pScaleValue
);
}
}
float
ijField
[
4
][
3
];
float
4
ijField
[
3
];
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
dScaleValue
,
pScaleValue
,
ijField
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
dScaleValue
,
pScaleValue
,
ijField
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
pullBack
,
pullBack
...
@@ -245,35 +245,35 @@ if( atomI == targetAtom || targetAtom == (y+j) ){
...
@@ -245,35 +245,35 @@ if( atomI == targetAtom || targetAtom == (y+j) ){
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
]
[
0
]
;
fieldSum
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
]
.
x
;
fieldSum
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
][
1
];
fieldSum
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
]
.
x
;
fieldSum
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
][
2
];
fieldSum
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
]
.
x
;
fieldPolarSum
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
][
0
];
fieldPolarSum
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
]
.
z
;
fieldPolarSum
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
][
1
];
fieldPolarSum
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
]
.
z
;
fieldPolarSum
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
]
[
2
]
;
fieldPolarSum
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
]
.
z
;
if
(
flags
==
0xFFFFFFFF
){
if
(
flags
==
0xFFFFFFFF
){
// add to field at atomJ the field due atomI's charge/dipole/quadrupole
// add to field at atomJ the field due atomI's charge/dipole/quadrupole
psA
[
jIdx
].
eField
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
0
];
psA
[
jIdx
].
eField
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
]
.
y
;
psA
[
jIdx
].
eField
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
]
[
1
]
;
psA
[
jIdx
].
eField
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
]
.
y
;
psA
[
jIdx
].
eField
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
2
];
psA
[
jIdx
].
eField
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
]
.
y
;
psA
[
jIdx
].
eFieldP
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
0
];
psA
[
jIdx
].
eFieldP
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
]
.
w
;
psA
[
jIdx
].
eFieldP
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
1
];
psA
[
jIdx
].
eFieldP
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
]
.
w
;
psA
[
jIdx
].
eFieldP
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
2
];
psA
[
jIdx
].
eFieldP
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
]
.
w
;
}
else
{
}
else
{
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
0
];
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
]
.
y
;
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
]
[
1
]
;
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
]
.
y
;
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
][
2
];
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
]
.
y
;
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
0
];
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
]
.
w
;
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
1
];
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
]
.
w
;
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
3
][
2
];
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
]
.
w
;
if
(
tgx
%
2
==
0
){
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
92a338cf
...
@@ -55,7 +55,7 @@ __device__ void sumTempBuffer( MutualInducedParticle& atomI, MutualInducedPartic
...
@@ -55,7 +55,7 @@ __device__ void sumTempBuffer( MutualInducedParticle& atomI, MutualInducedPartic
// file includes FixedFieldParticle struct definition/load/unload struct and body kernel for fixed E-field
// file includes FixedFieldParticle struct definition/load/unload struct and body kernel for fixed E-field
__device__
void
calculatePmeDirectMutualInducedFieldPairIxn_kernel
(
MutualInducedParticle
&
atomI
,
MutualInducedParticle
&
atomJ
,
__device__
void
calculatePmeDirectMutualInducedFieldPairIxn_kernel
(
MutualInducedParticle
&
atomI
,
MutualInducedParticle
&
atomJ
,
float
uscale
,
float
fields
[
4
][
3
]
float
uscale
,
float
4
fields
[
3
]
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
float4
*
pullBack
,
float4
*
pullBack
#endif
#endif
...
@@ -80,17 +80,16 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
...
@@ -80,17 +80,16 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
// calculate the error function damping terms
// calculate the error function damping terms
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
bn
[
3
];
bn
[
0
]
=
erfc
(
ralpha
)
/
r
;
float
bn0
=
erfc
(
ralpha
)
/
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cSim
.
alphaEwald
);
float
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cSim
.
alphaEwald
);
float
exp2a
=
exp
(
-
(
ralpha
*
ralpha
));
float
exp2a
=
exp
(
-
(
ralpha
*
ralpha
));
alsq2n
*=
alsq2
;
alsq2n
*=
alsq2
;
bn
[
1
]
=
(
bn
[
0
]
+
alsq2n
*
exp2a
)
/
r2
;
float
bn1
=
(
bn
0
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
alsq2n
*=
alsq2
;
bn
[
2
]
=
(
3.0
f
*
bn
[
1
]
+
alsq2n
*
exp2a
)
/
r2
;
float
bn2
=
(
3.0
f
*
bn
1
+
alsq2n
*
exp2a
)
/
r2
;
// compute the error function scaled and unscaled terms
// compute the error function scaled and unscaled terms
...
@@ -124,81 +123,76 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
...
@@ -124,81 +123,76 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
float
puir
=
atomI
.
inducedDipolePolar
[
0
]
*
xr
+
atomI
.
inducedDipolePolar
[
1
]
*
yr
+
atomI
.
inducedDipolePolar
[
2
]
*
zr
;
float
puir
=
atomI
.
inducedDipolePolar
[
0
]
*
xr
+
atomI
.
inducedDipolePolar
[
1
]
*
yr
+
atomI
.
inducedDipolePolar
[
2
]
*
zr
;
float
pukr
=
atomJ
.
inducedDipolePolar
[
0
]
*
xr
+
atomJ
.
inducedDipolePolar
[
1
]
*
yr
+
atomJ
.
inducedDipolePolar
[
2
]
*
zr
;
float
pukr
=
atomJ
.
inducedDipolePolar
[
0
]
*
xr
+
atomJ
.
inducedDipolePolar
[
1
]
*
yr
+
atomJ
.
inducedDipolePolar
[
2
]
*
zr
;
float
fimd
[
3
],
fkmd
[
3
];
bn1
*=
-
1.0
f
;
float
fimp
[
3
],
fkmp
[
3
];
float
fid
[
3
],
fkd
[
3
];
float
fip
[
3
],
fkp
[
3
];
bn
[
1
]
*=
-
1.0
f
;
float
fimd0
=
bn1
*
atomJ
.
inducedDipole
[
0
]
+
bn2
*
dukr
*
xr
;
float
fimd1
=
bn1
*
atomJ
.
inducedDipole
[
1
]
+
bn2
*
dukr
*
yr
;
float
fimd2
=
bn1
*
atomJ
.
inducedDipole
[
2
]
+
bn2
*
dukr
*
zr
;
f
imd
[
0
]
=
bn
[
1
]
*
atom
J
.
inducedDipole
[
0
]
+
bn
[
2
]
*
du
k
r
*
xr
;
f
loat
fkmd0
=
bn
1
*
atom
I
.
inducedDipole
[
0
]
+
bn
2
*
du
i
r
*
xr
;
f
imd
[
1
]
=
bn
[
1
]
*
atom
J
.
inducedDipole
[
1
]
+
bn
[
2
]
*
du
k
r
*
yr
;
f
loat
fkmd1
=
bn
1
*
atom
I
.
inducedDipole
[
1
]
+
bn
2
*
du
i
r
*
yr
;
f
imd
[
2
]
=
bn
[
1
]
*
atom
J
.
inducedDipole
[
2
]
+
bn
[
2
]
*
du
k
r
*
zr
;
f
loat
fkmd2
=
bn
1
*
atom
I
.
inducedDipole
[
2
]
+
bn
2
*
du
i
r
*
zr
;
f
kmd
[
0
]
=
bn
[
1
]
*
atom
I
.
inducedDipole
[
0
]
+
bn
[
2
]
*
dui
r
*
xr
;
f
loat
fimp0
=
bn
1
*
atom
J
.
inducedDipole
Polar
[
0
]
+
bn2
*
puk
r
*
xr
;
f
kmd
[
1
]
=
bn
[
1
]
*
atom
I
.
inducedDipole
[
1
]
+
bn
[
2
]
*
dui
r
*
yr
;
f
loat
fimp1
=
bn
1
*
atom
J
.
inducedDipole
Polar
[
1
]
+
bn2
*
puk
r
*
yr
;
f
kmd
[
2
]
=
bn
[
1
]
*
atom
I
.
inducedDipole
[
2
]
+
bn
[
2
]
*
dui
r
*
zr
;
f
loat
fimp2
=
bn
1
*
atom
J
.
inducedDipole
Polar
[
2
]
+
bn2
*
puk
r
*
zr
;
fimp
[
0
]
=
bn
[
1
]
*
atomJ
.
inducedDipolePolar
[
0
]
+
bn
[
2
]
*
pukr
*
xr
;
float
fkmp0
=
bn1
*
atomI
.
inducedDipolePolar
[
0
]
+
bn2
*
puir
*
xr
;
fimp
[
1
]
=
bn
[
1
]
*
atomJ
.
inducedDipolePolar
[
1
]
+
bn
[
2
]
*
pukr
*
yr
;
float
fkmp1
=
bn1
*
atomI
.
inducedDipolePolar
[
1
]
+
bn2
*
puir
*
yr
;
fimp
[
2
]
=
bn
[
1
]
*
atomJ
.
inducedDipolePolar
[
2
]
+
bn
[
2
]
*
pukr
*
zr
;
float
fkmp2
=
bn1
*
atomI
.
inducedDipolePolar
[
2
]
+
bn2
*
puir
*
zr
;
fkmp
[
0
]
=
bn
[
1
]
*
atomI
.
inducedDipolePolar
[
0
]
+
bn
[
2
]
*
puir
*
xr
;
fkmp
[
1
]
=
bn
[
1
]
*
atomI
.
inducedDipolePolar
[
1
]
+
bn
[
2
]
*
puir
*
yr
;
fkmp
[
2
]
=
bn
[
1
]
*
atomI
.
inducedDipolePolar
[
2
]
+
bn
[
2
]
*
puir
*
zr
;
rr3
*=
-
1.0
f
;;
rr3
*=
-
1.0
f
;;
f
id
[
0
]
=
rr3
*
atomJ
.
inducedDipole
[
0
]
+
rr5
*
dukr
*
xr
;
f
loat
fid0
=
rr3
*
atomJ
.
inducedDipole
[
0
]
+
rr5
*
dukr
*
xr
;
f
id
[
1
]
=
rr3
*
atomJ
.
inducedDipole
[
1
]
+
rr5
*
dukr
*
yr
;
f
loat
fid1
=
rr3
*
atomJ
.
inducedDipole
[
1
]
+
rr5
*
dukr
*
yr
;
f
id
[
2
]
=
rr3
*
atomJ
.
inducedDipole
[
2
]
+
rr5
*
dukr
*
zr
;
f
loat
fid2
=
rr3
*
atomJ
.
inducedDipole
[
2
]
+
rr5
*
dukr
*
zr
;
f
kd
[
0
]
=
rr3
*
atomI
.
inducedDipole
[
0
]
+
rr5
*
duir
*
xr
;
f
loat
fkd0
=
rr3
*
atomI
.
inducedDipole
[
0
]
+
rr5
*
duir
*
xr
;
f
kd
[
1
]
=
rr3
*
atomI
.
inducedDipole
[
1
]
+
rr5
*
duir
*
yr
;
f
loat
fkd1
=
rr3
*
atomI
.
inducedDipole
[
1
]
+
rr5
*
duir
*
yr
;
f
kd
[
2
]
=
rr3
*
atomI
.
inducedDipole
[
2
]
+
rr5
*
duir
*
zr
;
f
loat
fkd2
=
rr3
*
atomI
.
inducedDipole
[
2
]
+
rr5
*
duir
*
zr
;
f
ip
[
0
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
0
]
+
rr5
*
pukr
*
xr
;
f
loat
fip0
=
rr3
*
atomJ
.
inducedDipolePolar
[
0
]
+
rr5
*
pukr
*
xr
;
f
ip
[
1
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
1
]
+
rr5
*
pukr
*
yr
;
f
loat
fip1
=
rr3
*
atomJ
.
inducedDipolePolar
[
1
]
+
rr5
*
pukr
*
yr
;
f
ip
[
2
]
=
rr3
*
atomJ
.
inducedDipolePolar
[
2
]
+
rr5
*
pukr
*
zr
;
f
loat
fip2
=
rr3
*
atomJ
.
inducedDipolePolar
[
2
]
+
rr5
*
pukr
*
zr
;
f
kp
[
0
]
=
rr3
*
atomI
.
inducedDipolePolar
[
0
]
+
rr5
*
puir
*
xr
;
f
loat
fkp0
=
rr3
*
atomI
.
inducedDipolePolar
[
0
]
+
rr5
*
puir
*
xr
;
f
kp
[
1
]
=
rr3
*
atomI
.
inducedDipolePolar
[
1
]
+
rr5
*
puir
*
yr
;
f
loat
fkp1
=
rr3
*
atomI
.
inducedDipolePolar
[
1
]
+
rr5
*
puir
*
yr
;
f
kp
[
2
]
=
rr3
*
atomI
.
inducedDipolePolar
[
2
]
+
rr5
*
puir
*
zr
;
f
loat
fkp2
=
rr3
*
atomI
.
inducedDipolePolar
[
2
]
+
rr5
*
puir
*
zr
;
// increment the field at each site due to this interaction
// increment the field at each site due to this interaction
if
(
r2
<=
cSim
.
nonbondedCutoffSqr
){
if
(
r2
<=
cSim
.
nonbondedCutoffSqr
){
fields
[
0
]
[
0
]
=
fimd
[
0
]
-
fid
[
0
]
;
fields
[
0
]
.
x
=
fimd
0
-
fid
0
;
fields
[
1
][
0
]
=
fkmd
[
0
]
-
fkd
[
0
]
;
fields
[
0
]
.
y
=
fkmd
0
-
fkd
0
;
fields
[
2
][
0
]
=
fimp
[
0
]
-
fip
[
0
]
;
fields
[
0
]
.
z
=
fimp
0
-
fip
0
;
fields
[
3
][
0
]
=
fkmp
[
0
]
-
fkp
[
0
]
;
fields
[
0
]
.
w
=
fkmp
0
-
fkp
0
;
fields
[
0
][
1
]
=
fimd
[
1
]
-
fid
[
1
]
;
fields
[
1
]
.
x
=
fimd
1
-
fid
1
;
fields
[
1
]
[
1
]
=
fkmd
[
1
]
-
fkd
[
1
]
;
fields
[
1
]
.
y
=
fkmd
1
-
fkd
1
;
fields
[
2
][
1
]
=
fimp
[
1
]
-
fip
[
1
]
;
fields
[
1
]
.
z
=
fimp
1
-
fip
1
;
fields
[
3
][
1
]
=
fkmp
[
1
]
-
fkp
[
1
]
;
fields
[
1
]
.
w
=
fkmp
1
-
fkp
1
;
fields
[
0
][
2
]
=
fimd
[
2
]
-
fid
[
2
]
;
fields
[
2
]
.
x
=
fimd
2
-
fid
2
;
fields
[
1
][
2
]
=
fkmd
[
2
]
-
fkd
[
2
]
;
fields
[
2
]
.
y
=
fkmd
2
-
fkd
2
;
fields
[
2
]
[
2
]
=
fimp
[
2
]
-
fip
[
2
]
;
fields
[
2
]
.
z
=
fimp
2
-
fip
2
;
fields
[
3
][
2
]
=
fkmp
[
2
]
-
fkp
[
2
]
;
fields
[
2
]
.
w
=
fkmp
2
-
fkp
2
;
}
else
{
}
else
{
fields
[
0
]
[
0
]
=
0.0
f
;
fields
[
0
]
.
x
=
0.0
f
;
fields
[
1
][
0
]
=
0.0
f
;
fields
[
0
]
.
y
=
0.0
f
;
fields
[
2
][
0
]
=
0.0
f
;
fields
[
0
]
.
z
=
0.0
f
;
fields
[
3
][
0
]
=
0.0
f
;
fields
[
0
]
.
w
=
0.0
f
;
fields
[
0
][
1
]
=
0.0
f
;
fields
[
1
]
.
x
=
0.0
f
;
fields
[
1
]
[
1
]
=
0.0
f
;
fields
[
1
]
.
y
=
0.0
f
;
fields
[
2
][
1
]
=
0.0
f
;
fields
[
1
]
.
z
=
0.0
f
;
fields
[
3
][
1
]
=
0.0
f
;
fields
[
1
]
.
w
=
0.0
f
;
fields
[
0
][
2
]
=
0.0
f
;
fields
[
2
]
.
x
=
0.0
f
;
fields
[
1
][
2
]
=
0.0
f
;
fields
[
2
]
.
y
=
0.0
f
;
fields
[
2
]
[
2
]
=
0.0
f
;
fields
[
2
]
.
z
=
0.0
f
;
fields
[
3
][
2
]
=
0.0
f
;
fields
[
2
]
.
w
=
0.0
f
;
}
}
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
pullBack
[
0
].
x
=
xr
;
pullBack
[
0
].
x
=
xr
;
...
@@ -207,8 +201,8 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
...
@@ -207,8 +201,8 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
pullBack
[
0
].
w
=
r2
;
pullBack
[
0
].
w
=
r2
;
pullBack
[
1
].
x
=
alsq2
;
pullBack
[
1
].
x
=
alsq2
;
pullBack
[
1
].
y
=
bn
[
0
]
;
pullBack
[
1
].
y
=
bn
0
;
pullBack
[
1
].
z
=
bn
[
2
]
;
pullBack
[
1
].
z
=
bn
2
;
pullBack
[
1
].
w
=
exp2a
;
pullBack
[
1
].
w
=
exp2a
;
/*
/*
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
View file @
92a338cf
...
@@ -100,7 +100,7 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
...
@@ -100,7 +100,7 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
{
float
ijField
[
4
][
3
];
float
4
ijField
[
3
];
// load coords, charge, ...
// load coords, charge, ...
...
@@ -114,13 +114,13 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
...
@@ -114,13 +114,13 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
// add to field at atomI the field due atomJ's dipole
// add to field at atomI the field due atomJ's dipole
fieldSum
[
0
]
+=
mask
?
ijField
[
0
]
[
0
]
:
0
.
0
f
;
fieldSum
[
0
]
+=
mask
?
ijField
[
0
]
.
x
:
0
.
0
f
;
fieldSum
[
1
]
+=
mask
?
ijField
[
0
][
1
]
:
0
.
0
f
;
fieldSum
[
1
]
+=
mask
?
ijField
[
1
]
.
x
:
0
.
0
f
;
fieldSum
[
2
]
+=
mask
?
ijField
[
0
][
2
]
:
0
.
0
f
;
fieldSum
[
2
]
+=
mask
?
ijField
[
2
]
.
x
:
0
.
0
f
;
fieldPolarSum
[
0
]
+=
mask
?
ijField
[
2
][
0
]
:
0
.
0
f
;
fieldPolarSum
[
0
]
+=
mask
?
ijField
[
0
]
.
z
:
0
.
0
f
;
fieldPolarSum
[
1
]
+=
mask
?
ijField
[
2
][
1
]
:
0
.
0
f
;
fieldPolarSum
[
1
]
+=
mask
?
ijField
[
1
]
.
z
:
0
.
0
f
;
fieldPolarSum
[
2
]
+=
mask
?
ijField
[
2
]
[
2
]
:
0
.
0
f
;
fieldPolarSum
[
2
]
+=
mask
?
ijField
[
2
]
.
z
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
(
y
+
j
)
==
targetAtom
){
if
(
atomI
==
targetAtom
||
(
y
+
j
)
==
targetAtom
){
...
@@ -233,7 +233,7 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
...
@@ -233,7 +233,7 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
{
{
unsigned
int
jIdx
=
(
flags
==
0xFFFFFFFF
)
?
tj
:
j
;
unsigned
int
jIdx
=
(
flags
==
0xFFFFFFFF
)
?
tj
:
j
;
float
ijField
[
4
][
3
];
float
4
ijField
[
3
];
// load coords, charge, ...
// load coords, charge, ...
...
@@ -247,39 +247,39 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
...
@@ -247,39 +247,39 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
// add to field at atomI the field due atomJ's dipole
// add to field at atomI the field due atomJ's dipole
fieldSum
[
0
]
+=
mask
?
ijField
[
0
]
[
0
]
:
0
.
0
f
;
fieldSum
[
0
]
+=
mask
?
ijField
[
0
]
.
x
:
0
.
0
f
;
fieldSum
[
1
]
+=
mask
?
ijField
[
0
][
1
]
:
0
.
0
f
;
fieldSum
[
1
]
+=
mask
?
ijField
[
1
]
.
x
:
0
.
0
f
;
fieldSum
[
2
]
+=
mask
?
ijField
[
0
][
2
]
:
0
.
0
f
;
fieldSum
[
2
]
+=
mask
?
ijField
[
2
]
.
x
:
0
.
0
f
;
// add to polar field at atomI the field due atomJ's dipole
// add to polar field at atomI the field due atomJ's dipole
fieldPolarSum
[
0
]
+=
mask
?
ijField
[
2
][
0
]
:
0
.
0
f
;
fieldPolarSum
[
0
]
+=
mask
?
ijField
[
0
]
.
z
:
0
.
0
f
;
fieldPolarSum
[
1
]
+=
mask
?
ijField
[
2
][
1
]
:
0
.
0
f
;
fieldPolarSum
[
1
]
+=
mask
?
ijField
[
1
]
.
z
:
0
.
0
f
;
fieldPolarSum
[
2
]
+=
mask
?
ijField
[
2
]
[
2
]
:
0
.
0
f
;
fieldPolarSum
[
2
]
+=
mask
?
ijField
[
2
]
.
z
:
0
.
0
f
;
// add to field at atomJ the field due atomI's dipole
// add to field at atomJ the field due atomI's dipole
if
(
flags
==
0xFFFFFFFF
){
if
(
flags
==
0xFFFFFFFF
){
psA
[
jIdx
].
field
[
0
]
+=
mask
?
ijField
[
1
][
0
]
:
0
.
0
f
;
psA
[
jIdx
].
field
[
0
]
+=
mask
?
ijField
[
0
]
.
y
:
0
.
0
f
;
psA
[
jIdx
].
field
[
1
]
+=
mask
?
ijField
[
1
]
[
1
]
:
0
.
0
f
;
psA
[
jIdx
].
field
[
1
]
+=
mask
?
ijField
[
1
]
.
y
:
0
.
0
f
;
psA
[
jIdx
].
field
[
2
]
+=
mask
?
ijField
[
1
][
2
]
:
0
.
0
f
;
psA
[
jIdx
].
field
[
2
]
+=
mask
?
ijField
[
2
]
.
y
:
0
.
0
f
;
// add to polar field at atomJ the field due atomI's dipole
// add to polar field at atomJ the field due atomI's dipole
psA
[
jIdx
].
fieldPolar
[
0
]
+=
mask
?
ijField
[
3
][
0
]
:
0
.
0
f
;
psA
[
jIdx
].
fieldPolar
[
0
]
+=
mask
?
ijField
[
0
]
.
w
:
0
.
0
f
;
psA
[
jIdx
].
fieldPolar
[
1
]
+=
mask
?
ijField
[
3
][
1
]
:
0
.
0
f
;
psA
[
jIdx
].
fieldPolar
[
1
]
+=
mask
?
ijField
[
1
]
.
w
:
0
.
0
f
;
psA
[
jIdx
].
fieldPolar
[
2
]
+=
mask
?
ijField
[
3
][
2
]
:
0
.
0
f
;
psA
[
jIdx
].
fieldPolar
[
2
]
+=
mask
?
ijField
[
2
]
.
w
:
0
.
0
f
;
}
else
{
}
else
{
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
mask
?
0
.
0
f
:
ijField
[
1
][
0
];
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
mask
?
0
.
0
f
:
ijField
[
0
]
.
y
;
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
mask
?
0
.
0
f
:
ijField
[
1
]
[
1
]
;
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
mask
?
0
.
0
f
:
ijField
[
1
]
.
y
;
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
mask
?
0
.
0
f
:
ijField
[
1
][
2
];
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
mask
?
0
.
0
f
:
ijField
[
2
]
.
y
;
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
mask
?
0
.
0
f
:
ijField
[
3
][
0
];
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
mask
?
0
.
0
f
:
ijField
[
0
]
.
w
;
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
mask
?
0
.
0
f
:
ijField
[
3
][
1
];
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
mask
?
0
.
0
f
:
ijField
[
1
]
.
w
;
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
mask
?
0
.
0
f
:
ijField
[
3
][
2
];
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
mask
?
0
.
0
f
:
ijField
[
2
]
.
w
;
if
(
tgx
%
2
==
0
){
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment