Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
21ab511a
Commit
21ab511a
authored
Aug 09, 2010
by
Peter Eastman
Browse files
Optimizations to Kirkwood kernel
parent
f08750f2
Changes
2
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
738 additions
and
875 deletions
+738
-875
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
...latforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
+704
-815
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.h
...platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.h
+34
-60
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
View file @
21ab511a
This diff is collapsed.
Click to expand it.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.h
View file @
21ab511a
...
@@ -60,13 +60,6 @@ void METHOD_NAME(kCalculateAmoebaCudaKirkwood, Forces_kernel)(
...
@@ -60,13 +60,6 @@ void METHOD_NAME(kCalculateAmoebaCudaKirkwood, Forces_kernel)(
// pWorkArray_1_1 == dBorn
// pWorkArray_1_1 == dBorn
// pWorkArray_1_2 == dBornPolar
// pWorkArray_1_2 == dBornPolar
float4
jCoord
;
float
jDipole
[
3
];
float
jQuadrupole
[
9
];
float
jInducedDipole
[
3
];
float
jInducedDipolePolar
[
3
];
float
jBornRadius
;
float
energySum
=
0
.
0
f
;
float
energySum
=
0
.
0
f
;
while
(
pos
<
end
)
while
(
pos
<
end
)
...
@@ -87,20 +80,21 @@ void METHOD_NAME(kCalculateAmoebaCudaKirkwood, Forces_kernel)(
...
@@ -87,20 +80,21 @@ void METHOD_NAME(kCalculateAmoebaCudaKirkwood, Forces_kernel)(
KirkwoodParticle
*
psA
=
&
sA
[
tbx
];
KirkwoodParticle
*
psA
=
&
sA
[
tbx
];
unsigned
int
atomI
=
x
+
tgx
;
unsigned
int
atomI
=
x
+
tgx
;
float4
iCoord
=
cSim
.
pPosq
[
atomI
];
KirkwoodParticle
localParticle
;
loadKirkwoodShared
(
&
localParticle
,
atomI
,
cSim
.
pPosq
,
cAmoebaSim
.
pLabFrameDipole
,
cAmoebaSim
.
pLabFrameQuadrupole
,
cAmoebaSim
.
pInducedDipoleS
,
cAmoebaSim
.
pInducedDipolePolarS
,
cSim
.
pBornRadii
);
float
forceSum
[
3
];
float
torqueSum
[
3
];
float
dBornSum
;
float
dBornSum
;
float
dBornPolarSum
;
float
dBornPolarSum
;
force
Sum
[
0
]
=
0
.
0
f
;
localParticle
.
force
[
0
]
=
0
.
0
f
;
force
Sum
[
1
]
=
0
.
0
f
;
localParticle
.
force
[
1
]
=
0
.
0
f
;
force
Sum
[
2
]
=
0
.
0
f
;
localParticle
.
force
[
2
]
=
0
.
0
f
;
torque
Sum
[
0
]
=
0
.
0
f
;
localParticle
.
torque
[
0
]
=
0
.
0
f
;
torque
Sum
[
1
]
=
0
.
0
f
;
localParticle
.
torque
[
1
]
=
0
.
0
f
;
torque
Sum
[
2
]
=
0
.
0
f
;
localParticle
.
torque
[
2
]
=
0
.
0
f
;
dBornSum
=
0
.
0
f
;
dBornSum
=
0
.
0
f
;
dBornPolarSum
=
0
.
0
f
;
dBornPolarSum
=
0
.
0
f
;
...
@@ -131,18 +125,8 @@ void METHOD_NAME(kCalculateAmoebaCudaKirkwood, Forces_kernel)(
...
@@ -131,18 +125,8 @@ void METHOD_NAME(kCalculateAmoebaCudaKirkwood, Forces_kernel)(
unsigned
int
atomJ
=
y
+
j
;
unsigned
int
atomJ
=
y
+
j
;
unsigned
int
sameAtom
=
atomI
==
atomJ
?
1
:
0
;
unsigned
int
sameAtom
=
atomI
==
atomJ
?
1
:
0
;
// load coords, charge, ...
calculateKirkwoodPairIxn_kernel
(
localParticle
,
psA
[
j
],
sameAtom
,
loadKirkwoodData
(
&
(
psA
[
j
]),
&
jCoord
,
jDipole
,
jQuadrupole
,
jInducedDipole
,
jInducedDipolePolar
,
&
jBornRadius
);
calculateKirkwoodPairIxn_kernel
(
sameAtom
,
iCoord
,
jCoord
,
&
(
cAmoebaSim
.
pLabFrameDipole
[
3
*
atomI
]),
jDipole
,
&
(
cAmoebaSim
.
pLabFrameQuadrupole
[
9
*
atomI
]),
jQuadrupole
,
&
(
cAmoebaSim
.
pInducedDipoleS
[
3
*
atomI
]),
jInducedDipole
,
&
(
cAmoebaSim
.
pInducedDipolePolarS
[
3
*
atomI
]),
jInducedDipolePolar
,
cSim
.
pBornRadii
[
atomI
],
jBornRadius
,
force
,
torque
,
dBorn
,
dBornPolar
,
&
energy
force
,
torque
,
dBorn
,
dBornPolar
,
&
energy
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
pullBack
,
pullBack
...
@@ -153,9 +137,9 @@ void METHOD_NAME(kCalculateAmoebaCudaKirkwood, Forces_kernel)(
...
@@ -153,9 +137,9 @@ void METHOD_NAME(kCalculateAmoebaCudaKirkwood, Forces_kernel)(
// torques include i == j contribution
// torques include i == j contribution
torque
Sum
[
0
]
+=
mask
?
torque
[
0
][
0
]
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
mask
?
torque
[
0
][
0
]
:
0
.
0
f
;
torque
Sum
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
torque
Sum
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
dBornSum
+=
mask
?
dBorn
[
0
]
:
0
.
0
f
;
dBornSum
+=
mask
?
dBorn
[
0
]
:
0
.
0
f
;
dBornPolarSum
+=
mask
?
dBornPolar
[
0
]
:
0
.
0
f
;
dBornPolarSum
+=
mask
?
dBornPolar
[
0
]
:
0
.
0
f
;
...
@@ -165,9 +149,9 @@ void METHOD_NAME(kCalculateAmoebaCudaKirkwood, Forces_kernel)(
...
@@ -165,9 +149,9 @@ void METHOD_NAME(kCalculateAmoebaCudaKirkwood, Forces_kernel)(
mask
=
(
atomI
==
atomJ
)
?
0
:
mask
;
mask
=
(
atomI
==
atomJ
)
?
0
:
mask
;
force
Sum
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
localParticle
.
force
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
force
Sum
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
localParticle
.
force
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
force
Sum
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
localParticle
.
force
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
...
@@ -238,8 +222,8 @@ if( atomI == targetAtom || atomJ == targetAtom ){
...
@@ -238,8 +222,8 @@ if( atomI == targetAtom || atomJ == targetAtom ){
offset
*=
3
;
offset
*=
3
;
load3dArrayBufferPerWarp
(
offset
,
force
Sum
,
cAmoebaSim
.
pWorkArray_3_1
);
load3dArrayBufferPerWarp
(
offset
,
localParticle
.
force
,
cAmoebaSim
.
pWorkArray_3_1
);
load3dArrayBufferPerWarp
(
offset
,
torque
Sum
,
cAmoebaSim
.
pWorkArray_3_2
);
load3dArrayBufferPerWarp
(
offset
,
localParticle
.
torque
,
cAmoebaSim
.
pWorkArray_3_2
);
#else
#else
unsigned
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
;
unsigned
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
;
...
@@ -249,8 +233,8 @@ if( atomI == targetAtom || atomJ == targetAtom ){
...
@@ -249,8 +233,8 @@ if( atomI == targetAtom || atomJ == targetAtom ){
offset
*=
3
;
offset
*=
3
;
load3dArray
(
offset
,
force
Sum
,
cAmoebaSim
.
pWorkArray_3_1
);
load3dArray
(
offset
,
localParticle
.
force
,
cAmoebaSim
.
pWorkArray_3_1
);
load3dArray
(
offset
,
torque
Sum
,
cAmoebaSim
.
pWorkArray_3_2
);
load3dArray
(
offset
,
localParticle
.
torque
,
cAmoebaSim
.
pWorkArray_3_2
);
#endif
#endif
...
@@ -286,18 +270,8 @@ if( atomI == targetAtom || atomJ == targetAtom ){
...
@@ -286,18 +270,8 @@ if( atomI == targetAtom || atomJ == targetAtom ){
unsigned
int
atomJ
=
y
+
tj
;
unsigned
int
atomJ
=
y
+
tj
;
unsigned
int
sameAtom
=
0
;
unsigned
int
sameAtom
=
0
;
// load coords, charge, ...
calculateKirkwoodPairIxn_kernel
(
localParticle
,
psA
[
tj
],
sameAtom
,
loadKirkwoodData
(
&
(
psA
[
tj
]),
&
jCoord
,
jDipole
,
jQuadrupole
,
jInducedDipole
,
jInducedDipolePolar
,
&
jBornRadius
);
calculateKirkwoodPairIxn_kernel
(
sameAtom
,
iCoord
,
jCoord
,
&
(
cAmoebaSim
.
pLabFrameDipole
[
3
*
atomI
]),
jDipole
,
&
(
cAmoebaSim
.
pLabFrameQuadrupole
[
9
*
atomI
]),
jQuadrupole
,
&
(
cAmoebaSim
.
pInducedDipoleS
[
3
*
atomI
]),
jInducedDipole
,
&
(
cAmoebaSim
.
pInducedDipolePolarS
[
3
*
atomI
]),
jInducedDipolePolar
,
cSim
.
pBornRadii
[
atomI
],
jBornRadius
,
force
,
torque
,
dBorn
,
dBornPolar
,
&
energy
force
,
torque
,
dBorn
,
dBornPolar
,
&
energy
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
,
pullBack
,
pullBack
...
@@ -308,13 +282,13 @@ if( atomI == targetAtom || atomJ == targetAtom ){
...
@@ -308,13 +282,13 @@ if( atomI == targetAtom || atomJ == targetAtom ){
// add force and torque to atom I due atom J
// add force and torque to atom I due atom J
force
Sum
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
localParticle
.
force
[
0
]
+=
mask
?
force
[
0
]
:
0
.
0
f
;
force
Sum
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
localParticle
.
force
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
force
Sum
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
localParticle
.
force
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
torque
Sum
[
0
]
+=
mask
?
torque
[
0
][
0
]
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
mask
?
torque
[
0
][
0
]
:
0
.
0
f
;
torque
Sum
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
][
1
]
:
0
.
0
f
;
torque
Sum
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
][
2
]
:
0
.
0
f
;
dBornSum
+=
mask
?
dBorn
[
0
]
:
0
.
0
f
;
dBornSum
+=
mask
?
dBorn
[
0
]
:
0
.
0
f
;
dBornPolarSum
+=
mask
?
dBornPolar
[
0
]
:
0
.
0
f
;
dBornPolarSum
+=
mask
?
dBornPolar
[
0
]
:
0
.
0
f
;
...
@@ -410,8 +384,8 @@ if( mask || !mask ){
...
@@ -410,8 +384,8 @@ if( mask || !mask ){
offset
*=
3
;
offset
*=
3
;
load3dArrayBufferPerWarp
(
offset
,
force
Sum
,
cAmoebaSim
.
pWorkArray_3_1
);
load3dArrayBufferPerWarp
(
offset
,
localParticle
.
force
,
cAmoebaSim
.
pWorkArray_3_1
);
load3dArrayBufferPerWarp
(
offset
,
torque
Sum
,
cAmoebaSim
.
pWorkArray_3_2
);
load3dArrayBufferPerWarp
(
offset
,
localParticle
.
torque
,
cAmoebaSim
.
pWorkArray_3_2
);
offset
=
y
+
tgx
+
warp
*
cAmoebaSim
.
paddedNumberOfAtoms
;
offset
=
y
+
tgx
+
warp
*
cAmoebaSim
.
paddedNumberOfAtoms
;
...
@@ -435,8 +409,8 @@ if( mask || !mask ){
...
@@ -435,8 +409,8 @@ if( mask || !mask ){
offset
*=
3
;
offset
*=
3
;
load3dArray
(
offset
,
force
Sum
,
cAmoebaSim
.
pWorkArray_3_1
);
load3dArray
(
offset
,
localParticle
.
force
,
cAmoebaSim
.
pWorkArray_3_1
);
load3dArray
(
offset
,
torque
Sum
,
cAmoebaSim
.
pWorkArray_3_2
);
load3dArray
(
offset
,
localParticle
.
torque
,
cAmoebaSim
.
pWorkArray_3_2
);
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment