Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
f11d445b
Commit
f11d445b
authored
Aug 09, 2010
by
Peter Eastman
Browse files
Optimizations to KirkwoodEDiff kernel
parent
21ab511a
Changes
2
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
288 additions
and
455 deletions
+288
-455
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
+235
-337
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.h
...orms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.h
+53
-118
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
View file @
f11d445b
This diff is collapsed.
Click to expand it.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.h
View file @
f11d445b
...
...
@@ -63,14 +63,6 @@ void METHOD_NAME(kCalculateAmoebaCudaKirkwoodEDiff, Forces_kernel)(
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
lasty
=
0xFFFFFFFF
;
float4
jCoord
;
float
jDipole
[
3
];
float
jQuadrupole
[
9
];
float
jInducedDipole
[
3
];
float
jInducedDipolePolar
[
3
];
float
jInducedDipoleS
[
3
];
float
jInducedDipolePolarS
[
3
];
float
totalEnergy
=
0
.
0
f
;
float
tinker_f
=
(
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
);
...
...
@@ -81,9 +73,6 @@ void METHOD_NAME(kCalculateAmoebaCudaKirkwoodEDiff, Forces_kernel)(
unsigned
int
y
;
bool
bExclusionFlag
;
float
forceSum
[
3
];
float
torqueSum
[
3
];
float
force
[
3
];
float
torqueI
[
3
];
float
torqueJ
[
3
];
...
...
@@ -100,15 +89,20 @@ void METHOD_NAME(kCalculateAmoebaCudaKirkwoodEDiff, Forces_kernel)(
KirkwoodEDiffParticle
*
psA
=
&
sA
[
tbx
];
unsigned
int
atomI
=
x
+
tgx
;
float4
iCoord
=
atomCoord
[
atomI
];
KirkwoodEDiffParticle
localParticle
;
loadKirkwoodEDiffShared
(
&
localParticle
,
atomI
,
atomCoord
,
labFrameDipole
,
labFrameQuadrupole
,
inducedDipole
,
inducedDipolePolar
,
inducedDipoleS
,
inducedDipolePolarS
);
force
Sum
[
0
]
=
0
.
0
f
;
force
Sum
[
1
]
=
0
.
0
f
;
force
Sum
[
2
]
=
0
.
0
f
;
localParticle
.
force
[
0
]
=
0
.
0
f
;
localParticle
.
force
[
1
]
=
0
.
0
f
;
localParticle
.
force
[
2
]
=
0
.
0
f
;
torque
Sum
[
0
]
=
0
.
0
f
;
torque
Sum
[
1
]
=
0
.
0
f
;
torque
Sum
[
2
]
=
0
.
0
f
;
localParticle
.
torque
[
0
]
=
0
.
0
f
;
localParticle
.
torque
[
1
]
=
0
.
0
f
;
localParticle
.
torque
[
2
]
=
0
.
0
f
;
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
...
...
@@ -135,22 +129,8 @@ void METHOD_NAME(kCalculateAmoebaCudaKirkwoodEDiff, Forces_kernel)(
unsigned
int
atomJ
=
(
y
+
j
);
// load coords, charge, ...
loadKirkwoodEDiffData
(
&
(
psA
[
j
]),
&
jCoord
,
jDipole
,
jQuadrupole
,
jInducedDipole
,
jInducedDipolePolar
,
jInducedDipoleS
,
jInducedDipolePolarS
);
calculateKirkwoodEDiffPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
j
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
j
].
thole
,
&
(
labFrameDipole
[
3
*
atomI
]),
jDipole
,
&
(
labFrameQuadrupole
[
9
*
atomI
]),
jQuadrupole
,
&
(
inducedDipole
[
3
*
atomI
]),
jInducedDipole
,
&
(
inducedDipolePolar
[
3
*
atomI
]),
jInducedDipolePolar
,
&
(
inducedDipoleS
[
3
*
atomI
]),
jInducedDipoleS
,
&
(
inducedDipolePolarS
[
3
*
atomI
]),
jInducedDipolePolarS
,
calculateKirkwoodEDiffPairIxn_kernel
(
localParticle
,
psA
[
j
],
pScale
,
dScale
,
&
energy
,
force
,
torqueI
,
torqueJ
#ifdef AMOEBA_DEBUG
...
...
@@ -162,9 +142,9 @@ void METHOD_NAME(kCalculateAmoebaCudaKirkwoodEDiff, Forces_kernel)(
// torques include i == j contribution
torque
Sum
[
0
]
+=
mask
?
torqueI
[
0
]
:
0
.
0
f
;
torque
Sum
[
1
]
+=
mask
?
torqueI
[
1
]
:
0
.
0
f
;
torque
Sum
[
2
]
+=
mask
?
torqueI
[
2
]
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
mask
?
torqueI
[
0
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torqueI
[
1
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torqueI
[
2
]
:
0
.
0
f
;
totalEnergy
+=
mask
?
0
.
5
f
*
energy
:
0
.
0
f
;
...
...
@@ -172,9 +152,9 @@ void METHOD_NAME(kCalculateAmoebaCudaKirkwoodEDiff, Forces_kernel)(
mask
=
(
atomI
==
atomJ
)
?
0
:
mask
;
force
Sum
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
force
Sum
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
force
Sum
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
localParticle
.
force
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
localParticle
.
force
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
localParticle
.
force
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
...
...
@@ -210,27 +190,12 @@ if( atomI == targetAtom || atomJ == targetAtom ){
unsigned
int
atomJ
=
(
y
+
j
);
// load coords, charge, ...
loadKirkwoodEDiffData
(
&
(
psA
[
j
]),
&
jCoord
,
jDipole
,
jQuadrupole
,
jInducedDipole
,
jInducedDipolePolar
,
jInducedDipoleS
,
jInducedDipolePolarS
);
float
pScale
;
float
dScale
;
getMaskedDScaleFactor
(
j
,
dScaleMask
,
&
dScale
);
getMaskedPScaleFactor
(
j
,
pScaleMask
,
&
pScale
);
calculateKirkwoodEDiffPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
j
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
j
].
thole
,
&
(
labFrameDipole
[
3
*
atomI
]),
jDipole
,
&
(
labFrameQuadrupole
[
9
*
atomI
]),
jQuadrupole
,
&
(
inducedDipole
[
3
*
atomI
]),
jInducedDipole
,
&
(
inducedDipolePolar
[
3
*
atomI
]),
jInducedDipolePolar
,
&
(
inducedDipoleS
[
3
*
atomI
]),
jInducedDipoleS
,
&
(
inducedDipolePolarS
[
3
*
atomI
]),
jInducedDipolePolarS
,
calculateKirkwoodEDiffPairIxn_kernel
(
localParticle
,
psA
[
j
],
pScale
,
dScale
,
&
energy
,
force
,
torqueI
,
torqueJ
#ifdef AMOEBA_DEBUG
...
...
@@ -242,17 +207,17 @@ if( atomI == targetAtom || atomJ == targetAtom ){
// torques include i == j contribution
torque
Sum
[
0
]
+=
mask
?
torqueI
[
0
]
:
0
.
0
f
;
torque
Sum
[
1
]
+=
mask
?
torqueI
[
1
]
:
0
.
0
f
;
torque
Sum
[
2
]
+=
mask
?
torqueI
[
2
]
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
mask
?
torqueI
[
0
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torqueI
[
1
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torqueI
[
2
]
:
0
.
0
f
;
totalEnergy
+=
mask
?
0
.
5
f
*
energy
:
0
.
0
f
;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
force
Sum
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
force
Sum
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
force
Sum
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
localParticle
.
force
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
localParticle
.
force
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
localParticle
.
force
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
...
...
@@ -280,19 +245,19 @@ if( atomI == targetAtom || atomJ == targetAtom ){
// scale and write results
scale3dArray
(
tinker_f
,
force
Sum
);
scale3dArray
(
tinker_f
,
torque
Sum
);
scale3dArray
(
tinker_f
,
localParticle
.
force
);
scale3dArray
(
tinker_f
,
localParticle
.
torque
);
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
3
*
(
x
+
tgx
+
warp
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArrayBufferPerWarp
(
offset
,
force
Sum
,
outputForce
);
load3dArrayBufferPerWarp
(
offset
,
torque
Sum
,
outputTorque
);
load3dArrayBufferPerWarp
(
offset
,
localParticle
.
force
,
outputForce
);
load3dArrayBufferPerWarp
(
offset
,
localParticle
.
torque
,
outputTorque
);
#else
unsigned
int
offset
=
3
*
(
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArray
(
offset
,
force
Sum
,
outputForce
);
load3dArray
(
offset
,
torque
Sum
,
outputTorque
);
load3dArray
(
offset
,
localParticle
.
force
,
outputForce
);
load3dArray
(
offset
,
localParticle
.
torque
,
outputTorque
);
#endif
...
...
@@ -329,22 +294,7 @@ if( atomI == targetAtom || atomJ == targetAtom ){
unsigned
int
atomJ
=
y
+
tj
;
// load coords, charge, ...
loadKirkwoodEDiffData
(
&
(
psA
[
tj
]),
&
jCoord
,
jDipole
,
jQuadrupole
,
jInducedDipole
,
jInducedDipolePolar
,
jInducedDipoleS
,
jInducedDipolePolarS
);
calculateKirkwoodEDiffPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
tj
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
tj
].
thole
,
&
(
labFrameDipole
[
3
*
atomI
]),
jDipole
,
&
(
labFrameQuadrupole
[
9
*
atomI
]),
jQuadrupole
,
&
(
inducedDipole
[
3
*
atomI
]),
jInducedDipole
,
&
(
inducedDipolePolar
[
3
*
atomI
]),
jInducedDipolePolar
,
&
(
inducedDipoleS
[
3
*
atomI
]),
jInducedDipoleS
,
&
(
inducedDipolePolarS
[
3
*
atomI
]),
jInducedDipolePolarS
,
calculateKirkwoodEDiffPairIxn_kernel
(
localParticle
,
psA
[
tj
],
pScale
,
dScale
,
&
energy
,
force
,
torqueI
,
torqueJ
...
...
@@ -358,13 +308,13 @@ if( atomI == targetAtom || atomJ == targetAtom ){
// add force and torque to atom I due atom J
force
Sum
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
force
Sum
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
force
Sum
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
localParticle
.
force
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
localParticle
.
force
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
localParticle
.
force
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
torque
Sum
[
0
]
+=
mask
?
torqueI
[
0
]
:
0
.
0
f
;
torque
Sum
[
1
]
+=
mask
?
torqueI
[
1
]
:
0
.
0
f
;
torque
Sum
[
2
]
+=
mask
?
torqueI
[
2
]
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
mask
?
torqueI
[
0
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torqueI
[
1
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torqueI
[
2
]
:
0
.
0
f
;
totalEnergy
+=
mask
?
energy
:
0
.
0
f
;
...
...
@@ -413,27 +363,12 @@ if( atomI == targetAtom || atomJ == targetAtom ){
{
unsigned
int
atomJ
=
y
+
tj
;
// load coords, charge, ...
loadKirkwoodEDiffData
(
&
(
psA
[
tj
]),
&
jCoord
,
jDipole
,
jQuadrupole
,
jInducedDipole
,
jInducedDipolePolar
,
jInducedDipoleS
,
jInducedDipolePolarS
);
float
dScale
;
float
pScale
;
getMaskedDScaleFactor
(
tj
,
dScaleMask
,
&
dScale
);
getMaskedPScaleFactor
(
tj
,
pScaleMask
,
&
pScale
);
calculateKirkwoodEDiffPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
tj
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
tj
].
thole
,
&
(
labFrameDipole
[
3
*
atomI
]),
jDipole
,
&
(
labFrameQuadrupole
[
9
*
atomI
]),
jQuadrupole
,
&
(
inducedDipole
[
3
*
atomI
]),
jInducedDipole
,
&
(
inducedDipolePolar
[
3
*
atomI
]),
jInducedDipolePolar
,
&
(
inducedDipoleS
[
3
*
atomI
]),
jInducedDipoleS
,
&
(
inducedDipolePolarS
[
3
*
atomI
]),
jInducedDipolePolarS
,
calculateKirkwoodEDiffPairIxn_kernel
(
localParticle
,
psA
[
tj
],
pScale
,
dScale
,
&
energy
,
force
,
torqueI
,
torqueJ
...
...
@@ -447,13 +382,13 @@ if( atomI == targetAtom || atomJ == targetAtom ){
// add force and torque to atom I due atom J
force
Sum
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
force
Sum
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
force
Sum
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
localParticle
.
force
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
localParticle
.
force
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
localParticle
.
force
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
torque
Sum
[
0
]
+=
mask
?
torqueI
[
0
]
:
0
.
0
f
;
torque
Sum
[
1
]
+=
mask
?
torqueI
[
1
]
:
0
.
0
f
;
torque
Sum
[
2
]
+=
mask
?
torqueI
[
2
]
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
mask
?
torqueI
[
0
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torqueI
[
1
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torqueI
[
2
]
:
0
.
0
f
;
totalEnergy
+=
mask
?
energy
:
0
.
0
f
;
...
...
@@ -492,8 +427,8 @@ if( atomI == targetAtom || atomJ == targetAtom ){
// scale and write results
scale3dArray
(
tinker_f
,
force
Sum
);
scale3dArray
(
tinker_f
,
torque
Sum
);
scale3dArray
(
tinker_f
,
localParticle
.
force
);
scale3dArray
(
tinker_f
,
localParticle
.
torque
);
scale3dArray
(
tinker_f
,
sA
[
threadIdx
.
x
].
force
);
scale3dArray
(
tinker_f
,
sA
[
threadIdx
.
x
].
torque
);
...
...
@@ -502,8 +437,8 @@ if( atomI == targetAtom || atomJ == targetAtom ){
unsigned
int
offset
=
3
*
(
x
+
tgx
+
warp
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArrayBufferPerWarp
(
offset
,
force
Sum
,
outputForce
);
load3dArrayBufferPerWarp
(
offset
,
torque
Sum
,
outputTorque
);
load3dArrayBufferPerWarp
(
offset
,
localParticle
.
force
,
outputForce
);
load3dArrayBufferPerWarp
(
offset
,
localParticle
.
torque
,
outputTorque
);
offset
=
3
*
(
y
+
tgx
+
warp
*
cAmoebaSim
.
paddedNumberOfAtoms
);
...
...
@@ -512,8 +447,8 @@ if( atomI == targetAtom || atomJ == targetAtom ){
#else
unsigned
int
offset
=
3
*
(
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArray
(
offset
,
force
Sum
,
outputForce
);
load3dArray
(
offset
,
torque
Sum
,
outputTorque
);
load3dArray
(
offset
,
localParticle
.
force
,
outputForce
);
load3dArray
(
offset
,
localParticle
.
torque
,
outputTorque
);
offset
=
3
*
(
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment