Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
35c974f6
Commit
35c974f6
authored
Oct 07, 2010
by
Peter Eastman
Browse files
Optimizations to PME
parent
92a338cf
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
224 additions
and
229 deletions
+224
-229
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
...eba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
+0
-11
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
.../src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
+79
-77
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
...rms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
+71
-69
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
...a/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
+74
-72
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
View file @
35c974f6
...
@@ -1018,18 +1018,7 @@ void kCalculateAmoebaPMEInducedDipoleField(amoebaGpuContext amoebaGpu)
...
@@ -1018,18 +1018,7 @@ void kCalculateAmoebaPMEInducedDipoleField(amoebaGpuContext amoebaGpu)
*/
*/
void
kCalculateAmoebaPMEInducedDipoleForces
(
amoebaGpuContext
amoebaGpu
)
void
kCalculateAmoebaPMEInducedDipoleForces
(
amoebaGpuContext
amoebaGpu
)
{
{
// Perform PME for the induced dipoles.
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kGridSpreadInducedDipoles_kernel
<<<
10
*
gpu
->
sim
.
blocks
,
64
>>>
();
LAUNCHERROR
(
"kGridSpreadInducedDipoles"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
kAmoebaReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
LAUNCHERROR
(
"kAmoebaReciprocalConvolution"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_INVERSE
);
int
potentialThreads
=
(
gpu
->
sm_version
>=
SM_20
?
256
:
(
gpu
->
sm_version
>=
SM_12
?
128
:
64
));
kComputeInducedPotentialFromGrid_kernel
<<<
gpu
->
sim
.
blocks
,
potentialThreads
>>>
();
LAUNCHERROR
(
"kComputeInducedPotentialFromGrid"
);
kComputeInducedDipoleForceAndEnergy_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
kComputeInducedDipoleForceAndEnergy_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kComputeInducedDipoleForceAndEnergy"
);
LAUNCHERROR
(
"kComputeInducedDipoleForceAndEnergy"
);
cudaComputeAmoebaMapTorquesAndAddTotalForce2
(
amoebaGpu
,
amoebaGpu
->
psTorque
,
gpu
->
psForce4
);
cudaComputeAmoebaMapTorquesAndAddTotalForce2
(
amoebaGpu
,
amoebaGpu
->
psTorque
,
gpu
->
psForce4
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
View file @
35c974f6
...
@@ -298,92 +298,94 @@ if( atomI == targetAtom ){
...
@@ -298,92 +298,94 @@ if( atomI == targetAtom ){
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
unsigned
int
jIdx
=
(
flags
==
0xFFFFFFFF
)
?
tj
:
j
;
unsigned
int
atomJ
=
y
+
jIdx
;
float
force
[
3
];
float
torque
[
2
][
3
];
// set scale factors
if
(
bExclusionFlag
)
{
{
getMaskedDScaleFactor
(
jIdx
,
dScaleMask
,
scalingFactors
+
DScaleIndex
);
unsigned
int
jIdx
=
(
flags
==
0xFFFFFFFF
)
?
tj
:
j
;
getMaskedPScaleFactor
(
jIdx
,
pScaleMask
,
scalingFactors
+
PScaleIndex
);
unsigned
int
atomJ
=
y
+
jIdx
;
getMaskedMScaleFactor
(
jIdx
,
mScaleMask
,
scalingFactors
+
MScaleIndex
);
}
float
force
[
3
];
float
torque
[
2
][
3
];
// force
// set scale factors
float
energy
;
calculatePmeDirectElectrostaticPairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
if
(
bExclusionFlag
)
scalingFactors
,
force
,
torque
,
&
energy
{
getMaskedDScaleFactor
(
jIdx
,
dScaleMask
,
scalingFactors
+
DScaleIndex
);
getMaskedPScaleFactor
(
jIdx
,
pScaleMask
,
scalingFactors
+
PScaleIndex
);
getMaskedMScaleFactor
(
jIdx
,
mScaleMask
,
scalingFactors
+
MScaleIndex
);
}
// force
float
energy
;
calculatePmeDirectElectrostaticPairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
scalingFactors
,
force
,
torque
,
&
energy
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
pullBack
,
pullBack
#endif
#endif
);
);
// check if atoms out-of-bounds
unsigned
int
mask
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
(
atomJ
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
// add force and torque to atom I due atom J
localParticle
.
force
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
localParticle
.
force
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
localParticle
.
force
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
mask
?
torque
[
0
][
0
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
totalEnergy
+=
mask
?
energy
:
0
.
0
f
;
// add force and torque to atom J due atom I
// check if atoms out-of-bounds
if
(
flags
==
0xFFFFFFFF
){
unsigned
int
mask
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
(
atomJ
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
psA
[
jIdx
].
force
[
0
]
-=
mask
?
force
[
0
]
:
0
.
0
f
;
// add force and torque to atom I due atom J
psA
[
jIdx
].
force
[
1
]
-=
mask
?
force
[
1
]
:
0
.
0
f
;
psA
[
jIdx
].
force
[
2
]
-=
mask
?
force
[
2
]
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
0
]
+=
mask
?
torque
[
1
][
0
]
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
1
]
+=
mask
?
torque
[
1
][
1
]
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
2
]
+=
mask
?
torque
[
1
][
2
]
:
0
.
0
f
;
}
else
{
sA
[
threadIdx
.
x
].
tempForce
[
0
]
=
mask
?
0
.
0
f
:
force
[
0
];
sA
[
threadIdx
.
x
].
tempForce
[
1
]
=
mask
?
0
.
0
f
:
force
[
1
];
sA
[
threadIdx
.
x
].
tempForce
[
2
]
=
mask
?
0
.
0
f
:
force
[
2
];
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
=
mask
?
0
.
0
f
:
torque
[
1
][
0
];
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
=
mask
?
0
.
0
f
:
torque
[
1
][
1
];
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
=
mask
?
0
.
0
f
:
torque
[
1
][
2
];
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
}
if
(
tgx
%
4
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
2
]
);
}
if
(
tgx
%
8
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
4
]
);
}
if
(
tgx
%
16
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
8
]
);
}
if
(
tgx
==
0
)
localParticle
.
force
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
{
localParticle
.
force
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
psA
[
jIdx
].
force
[
0
]
-=
sA
[
threadIdx
.
x
].
tempForce
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempForce
[
0
];
localParticle
.
force
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
psA
[
jIdx
].
force
[
1
]
-=
sA
[
threadIdx
.
x
].
tempForce
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempForce
[
1
];
psA
[
jIdx
].
force
[
2
]
-=
sA
[
threadIdx
.
x
].
tempForce
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempForce
[
2
];
localParticle
.
torque
[
0
]
+=
mask
?
torque
[
0
][
0
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
totalEnergy
+=
mask
?
energy
:
0
.
0
f
;
// add force and torque to atom J due atom I
if
(
flags
==
0xFFFFFFFF
){
psA
[
jIdx
].
force
[
0
]
-=
mask
?
force
[
0
]
:
0
.
0
f
;
psA
[
jIdx
].
force
[
1
]
-=
mask
?
force
[
1
]
:
0
.
0
f
;
psA
[
jIdx
].
force
[
2
]
-=
mask
?
force
[
2
]
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
0
]
+=
mask
?
torque
[
1
][
0
]
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
1
]
+=
mask
?
torque
[
1
][
1
]
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
2
]
+=
mask
?
torque
[
1
][
2
]
:
0
.
0
f
;
}
else
{
sA
[
threadIdx
.
x
].
tempForce
[
0
]
=
mask
?
0
.
0
f
:
force
[
0
];
sA
[
threadIdx
.
x
].
tempForce
[
1
]
=
mask
?
0
.
0
f
:
force
[
1
];
sA
[
threadIdx
.
x
].
tempForce
[
2
]
=
mask
?
0
.
0
f
:
force
[
2
];
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
=
mask
?
0
.
0
f
:
torque
[
1
][
0
];
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
=
mask
?
0
.
0
f
:
torque
[
1
][
1
];
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
=
mask
?
0
.
0
f
:
torque
[
1
][
2
];
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
}
if
(
tgx
%
4
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
2
]
);
}
if
(
tgx
%
8
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
4
]
);
}
if
(
tgx
%
16
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
8
]
);
}
if
(
tgx
==
0
)
{
psA
[
jIdx
].
force
[
0
]
-=
sA
[
threadIdx
.
x
].
tempForce
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempForce
[
0
];
psA
[
jIdx
].
force
[
1
]
-=
sA
[
threadIdx
.
x
].
tempForce
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempForce
[
1
];
psA
[
jIdx
].
force
[
2
]
-=
sA
[
threadIdx
.
x
].
tempForce
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempForce
[
2
];
psA
[
jIdx
].
torque
[
0
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
0
];
psA
[
jIdx
].
torque
[
0
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
0
];
psA
[
jIdx
].
torque
[
1
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
1
];
psA
[
jIdx
].
torque
[
1
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
1
];
psA
[
jIdx
].
torque
[
2
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
2
];
psA
[
jIdx
].
torque
[
2
]
+=
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempTorque
[
2
];
}
}
}
}
}
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
View file @
35c974f6
...
@@ -228,77 +228,78 @@ if( atomI == targetAtom || targetAtom == (y+j) ){
...
@@ -228,77 +228,78 @@ if( atomI == targetAtom || targetAtom == (y+j) ){
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
){
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
){
unsigned
int
jIdx
=
(
flags
==
0xFFFFFFFF
)
?
tj
:
j
;
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
if
(
bExclusionFlag
){
unsigned
int
jIdx
=
(
flags
==
0xFFFFFFFF
)
?
tj
:
j
;
getMaskedDScaleFactor
(
jIdx
,
dScaleMask
,
&
dScaleValue
);
if
(
bExclusionFlag
){
getMaskedPScaleFactor
(
jIdx
,
pScaleMask
,
&
pScaleValue
);
getMaskedDScaleFactor
(
jIdx
,
dScaleMask
,
&
dScaleValue
);
}
getMaskedPScaleFactor
(
jIdx
,
pScaleMask
,
&
pScaleValue
);
}
float4
ijField
[
3
];
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
dScaleValue
,
pScaleValue
,
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
unsigned
int
outOfBounds
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
jIdx
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
1
:
0
;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
x
;
fieldSum
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
x
;
fieldSum
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
x
;
fieldPolarSum
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
z
;
fieldPolarSum
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
z
;
fieldPolarSum
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
z
;
if
(
flags
==
0xFFFFFFFF
){
// add to field at atomJ the field due atomI's charge/dipole/quadrupole
float4
ijField
[
3
];
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
dScaleValue
,
pScaleValue
,
ijField
psA
[
jIdx
].
eField
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
y
;
#ifdef AMOEBA_DEBUG
psA
[
jIdx
].
eField
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
y
;
,
pullBack
psA
[
jIdx
].
eField
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
y
;
#endif
);
psA
[
jIdx
].
eFieldP
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
w
;
psA
[
jIdx
].
eFieldP
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
w
;
unsigned
int
outOfBounds
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
jIdx
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
1
:
0
;
psA
[
jIdx
].
eFieldP
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
w
;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
}
else
{
fieldSum
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
x
;
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
y
;
fieldSum
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
x
;
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
y
;
fieldSum
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
x
;
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
y
;
fieldPolarSum
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
z
;
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
w
;
fieldPolarSum
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
z
;
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
w
;
fieldPolarSum
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
z
;
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
w
;
if
(
flags
==
0xFFFFFFFF
){
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
// add to field at atomJ the field due atomI's charge/dipole/quadrupole
}
if
(
tgx
%
4
==
0
){
psA
[
jIdx
].
eField
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
y
;
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
2
]
);
psA
[
jIdx
].
eField
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
y
;
}
psA
[
jIdx
].
eField
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
y
;
if
(
tgx
%
8
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
4
]
);
psA
[
jIdx
].
eFieldP
[
0
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
w
;
}
psA
[
jIdx
].
eFieldP
[
1
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
w
;
if
(
tgx
%
16
==
0
){
psA
[
jIdx
].
eFieldP
[
2
]
+=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
w
;
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
8
]
);
}
}
else
{
if
(
tgx
==
0
)
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
y
;
{
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
y
;
psA
[
jIdx
].
eField
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
0
];
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
y
;
psA
[
jIdx
].
eField
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
1
];
psA
[
jIdx
].
eField
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
2
];
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
0
].
w
;
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
1
].
w
;
psA
[
jIdx
].
eFieldP
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
0
];
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
outOfBounds
?
0
.
0
f
:
ijField
[
2
].
w
;
psA
[
jIdx
].
eFieldP
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
1
];
psA
[
jIdx
].
eFieldP
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
2
];
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
}
if
(
tgx
%
4
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
2
]
);
}
if
(
tgx
%
8
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
4
]
);
}
if
(
tgx
%
16
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
8
]
);
}
if
(
tgx
==
0
)
{
psA
[
jIdx
].
eField
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
0
];
psA
[
jIdx
].
eField
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
1
];
psA
[
jIdx
].
eField
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
2
];
psA
[
jIdx
].
eFieldP
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
0
];
psA
[
jIdx
].
eFieldP
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
1
];
psA
[
jIdx
].
eFieldP
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
2
];
}
}
}
}
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
(
atomI
==
targetAtom
||
(
y
+
jIdx
)
==
targetAtom
)
){
if
(
(
atomI
==
targetAtom
||
(
y
+
jIdx
)
==
targetAtom
)
){
...
@@ -358,6 +359,7 @@ if( (atomI == targetAtom || (y + jIdx) == targetAtom) ){
...
@@ -358,6 +359,7 @@ if( (atomI == targetAtom || (y + jIdx) == targetAtom) ){
}
}
}
}
#endif
#endif
}
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
// j-loop block
}
// j-loop block
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
View file @
35c974f6
...
@@ -231,81 +231,82 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
...
@@ -231,81 +231,82 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
unsigned
int
jIdx
=
(
flags
==
0xFFFFFFFF
)
?
tj
:
j
;
{
float4
ijField
[
3
];
unsigned
int
jIdx
=
(
flags
==
0xFFFFFFFF
)
?
tj
:
j
;
float4
ijField
[
3
];
// load coords, charge, ...
calculatePmeDirectMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
uscale
,
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
unsigned
int
mask
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
jIdx
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
// add to field at atomI the field due atomJ's dipole
fieldSum
[
0
]
+=
mask
?
ijField
[
0
].
x
:
0
.
0
f
;
fieldSum
[
1
]
+=
mask
?
ijField
[
1
].
x
:
0
.
0
f
;
fieldSum
[
2
]
+=
mask
?
ijField
[
2
].
x
:
0
.
0
f
;
// add to polar field at atomI the field due atomJ's dipole
fieldPolarSum
[
0
]
+=
mask
?
ijField
[
0
].
z
:
0
.
0
f
;
fieldPolarSum
[
1
]
+=
mask
?
ijField
[
1
].
z
:
0
.
0
f
;
fieldPolarSum
[
2
]
+=
mask
?
ijField
[
2
].
z
:
0
.
0
f
;
// add to field at atomJ the field due atomI's dipole
if
(
flags
==
0xFFFFFFFF
){
psA
[
jIdx
].
field
[
0
]
+=
mask
?
ijField
[
0
].
y
:
0
.
0
f
;
psA
[
jIdx
].
field
[
1
]
+=
mask
?
ijField
[
1
].
y
:
0
.
0
f
;
psA
[
jIdx
].
field
[
2
]
+=
mask
?
ijField
[
2
].
y
:
0
.
0
f
;
// add to polar field at atomJ the field due atomI's dipole
psA
[
jIdx
].
fieldPolar
[
0
]
+=
mask
?
ijField
[
0
].
w
:
0
.
0
f
;
psA
[
jIdx
].
fieldPolar
[
1
]
+=
mask
?
ijField
[
1
].
w
:
0
.
0
f
;
psA
[
jIdx
].
fieldPolar
[
2
]
+=
mask
?
ijField
[
2
].
w
:
0
.
0
f
;
}
else
{
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
mask
?
0
.
0
f
:
ijField
[
0
].
y
;
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
mask
?
0
.
0
f
:
ijField
[
1
].
y
;
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
mask
?
0
.
0
f
:
ijField
[
2
].
y
;
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
mask
?
0
.
0
f
:
ijField
[
0
].
w
;
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
mask
?
0
.
0
f
:
ijField
[
1
].
w
;
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
mask
?
0
.
0
f
:
ijField
[
2
].
w
;
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
}
if
(
tgx
%
4
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
2
]
);
}
if
(
tgx
%
8
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
4
]
);
}
if
(
tgx
%
16
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
8
]
);
}
if
(
tgx
==
0
)
// load coords, charge, ...
{
psA
[
jIdx
].
field
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
0
];
psA
[
jIdx
].
field
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
1
];
psA
[
jIdx
].
field
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
2
];
psA
[
jIdx
].
fieldPolar
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
0
];
calculatePmeDirectMutualInducedFieldPairIxn_kernel
(
localParticle
,
psA
[
jIdx
],
uscale
,
ijField
psA
[
jIdx
].
fieldPolar
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
1
];
#ifdef AMOEBA_DEBUG
psA
[
jIdx
].
fieldPolar
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
2
];
,
pullBack
}
#endif
);
}
unsigned
int
mask
=
(
(
atomI
>=
cAmoebaSim
.
numberOfAtoms
)
||
((
y
+
jIdx
)
>=
cAmoebaSim
.
numberOfAtoms
)
)
?
0
:
1
;
// add to field at atomI the field due atomJ's dipole
fieldSum
[
0
]
+=
mask
?
ijField
[
0
].
x
:
0
.
0
f
;
fieldSum
[
1
]
+=
mask
?
ijField
[
1
].
x
:
0
.
0
f
;
fieldSum
[
2
]
+=
mask
?
ijField
[
2
].
x
:
0
.
0
f
;
// add to polar field at atomI the field due atomJ's dipole
fieldPolarSum
[
0
]
+=
mask
?
ijField
[
0
].
z
:
0
.
0
f
;
fieldPolarSum
[
1
]
+=
mask
?
ijField
[
1
].
z
:
0
.
0
f
;
fieldPolarSum
[
2
]
+=
mask
?
ijField
[
2
].
z
:
0
.
0
f
;
// add to field at atomJ the field due atomI's dipole
if
(
flags
==
0xFFFFFFFF
){
psA
[
jIdx
].
field
[
0
]
+=
mask
?
ijField
[
0
].
y
:
0
.
0
f
;
psA
[
jIdx
].
field
[
1
]
+=
mask
?
ijField
[
1
].
y
:
0
.
0
f
;
psA
[
jIdx
].
field
[
2
]
+=
mask
?
ijField
[
2
].
y
:
0
.
0
f
;
// add to polar field at atomJ the field due atomI's dipole
psA
[
jIdx
].
fieldPolar
[
0
]
+=
mask
?
ijField
[
0
].
w
:
0
.
0
f
;
psA
[
jIdx
].
fieldPolar
[
1
]
+=
mask
?
ijField
[
1
].
w
:
0
.
0
f
;
psA
[
jIdx
].
fieldPolar
[
2
]
+=
mask
?
ijField
[
2
].
w
:
0
.
0
f
;
}
else
{
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
=
mask
?
0
.
0
f
:
ijField
[
0
].
y
;
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
=
mask
?
0
.
0
f
:
ijField
[
1
].
y
;
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
=
mask
?
0
.
0
f
:
ijField
[
2
].
y
;
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
=
mask
?
0
.
0
f
:
ijField
[
0
].
w
;
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
=
mask
?
0
.
0
f
:
ijField
[
1
].
w
;
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
=
mask
?
0
.
0
f
:
ijField
[
2
].
w
;
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
}
if
(
tgx
%
4
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
2
]
);
}
if
(
tgx
%
8
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
4
]
);
}
if
(
tgx
%
16
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
8
]
);
}
if
(
tgx
==
0
)
{
psA
[
jIdx
].
field
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
0
];
psA
[
jIdx
].
field
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
1
];
psA
[
jIdx
].
field
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBuffer
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBuffer
[
2
];
psA
[
jIdx
].
fieldPolar
[
0
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
0
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
0
];
psA
[
jIdx
].
fieldPolar
[
1
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
1
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
1
];
psA
[
jIdx
].
fieldPolar
[
2
]
+=
sA
[
threadIdx
.
x
].
tempBufferP
[
2
]
+
sA
[
threadIdx
.
x
+
16
].
tempBufferP
[
2
];
}
}
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
||
(
y
+
jIdx
)
==
targetAtom
){
if
(
atomI
==
targetAtom
||
(
y
+
jIdx
)
==
targetAtom
){
...
@@ -359,6 +360,7 @@ if( atomI == targetAtom || (y+jIdx) == targetAtom ){
...
@@ -359,6 +360,7 @@ if( atomI == targetAtom || (y+jIdx) == targetAtom ){
debugArray
[
index
].
w
=
flag
;
debugArray
[
index
].
w
=
flag
;
}
}
#endif
#endif
}
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment