Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
408469c3
Commit
408469c3
authored
Oct 07, 2010
by
Peter Eastman
Browse files
Optimizations to PME
parent
45b0302d
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
258 additions
and
265 deletions
+258
-265
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+82
-87
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
.../src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
+20
-20
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
...ms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+92
-93
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+63
-64
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
...a/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
+1
-1
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
View file @
408469c3
...
...
@@ -152,15 +152,26 @@ __device__ static void calculatePmeSelfTorqueElectrostaticPairIxn_kernel( PmeDir
}
__device__
void
calculatePmeDirectElectrostaticPairIxn_kernel
(
PmeDirectElectrostaticParticle
&
atomI
,
PmeDirectElectrostaticParticle
&
atomJ
,
float
*
scalingFactors
,
float
*
outputForce
,
float
outputTorque
[
2
][
3
],
float
*
energy
float
*
scalingFactors
,
float
*
outputForce
,
float
3
outputTorque
[
3
],
float
*
energy
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
#endif
){
float
xr
=
atomJ
.
x
-
atomI
.
x
;
float
yr
=
atomJ
.
y
-
atomI
.
y
;
float
zr
=
atomJ
.
z
-
atomI
.
z
;
// periodic box
float
e
,
ei
;
float
erl
,
erli
;
xr
-=
floor
(
xr
*
cSim
.
invPeriodicBoxSizeX
+
0.5
f
)
*
cSim
.
periodicBoxSizeX
;
yr
-=
floor
(
yr
*
cSim
.
invPeriodicBoxSizeY
+
0.5
f
)
*
cSim
.
periodicBoxSizeY
;
zr
-=
floor
(
zr
*
cSim
.
invPeriodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
float
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
if
(
r2
<=
cSim
.
nonbondedCutoffSqr
){
float
r
=
sqrt
(
r2
);
float
ck
=
atomJ
.
q
;
float
conversionFactor
=
(
-
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
);
...
...
@@ -184,22 +195,6 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
float
qi8
=
atomI
.
labFrameQuadrupole
[
7
];
float
qi9
=
atomI
.
labFrameQuadrupole
[
8
];
float
xr
=
atomJ
.
x
-
atomI
.
x
;
float
yr
=
atomJ
.
y
-
atomI
.
y
;
float
zr
=
atomJ
.
z
-
atomI
.
z
;
// periodic box
xr
-=
floor
(
xr
*
cSim
.
invPeriodicBoxSizeX
+
0.5
f
)
*
cSim
.
periodicBoxSizeX
;
yr
-=
floor
(
yr
*
cSim
.
invPeriodicBoxSizeY
+
0.5
f
)
*
cSim
.
periodicBoxSizeY
;
zr
-=
floor
(
zr
*
cSim
.
invPeriodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
float
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
if
(
r2
<=
cSim
.
nonbondedCutoffSqr
){
float
r
=
sqrt
(
r2
);
float
ck
=
atomJ
.
q
;
float
dk1
=
atomJ
.
labFrameDipole
[
0
];
float
dk2
=
atomJ
.
labFrameDipole
[
1
];
float
dk3
=
atomJ
.
labFrameDipole
[
2
];
...
...
@@ -497,18 +492,18 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
// compute the energy contributions for this interaction
e
=
bn0
*
gl0
+
bn1
*
(
gl1
+
gl6
)
float
e
=
bn0
*
gl0
+
bn1
*
(
gl1
+
gl6
)
+
bn2
*
(
gl2
+
gl7
+
gl8
)
+
bn3
*
(
gl3
+
gl5
)
+
bn4
*
gl4
;
ei
=
0.5
f
*
(
bn1
*
(
gli1
+
gli6
)
float
ei
=
0.5
f
*
(
bn1
*
(
gli1
+
gli6
)
+
bn2
*
(
gli2
+
gli7
)
+
bn3
*
gli3
);
// get the real energy without any screening function
erl
=
rr1
*
gl0
+
rr3
*
(
gl1
+
gl6
)
float
erl
=
rr1
*
gl0
+
rr3
*
(
gl1
+
gl6
)
+
rr5
*
(
gl2
+
gl7
+
gl8
)
+
rr7
*
(
gl3
+
gl5
)
+
rr9
*
gl4
;
erli
=
0.5
f
*
(
rr3
*
(
gli1
+
gli6
)
*
psc3
float
erli
=
0.5
f
*
(
rr3
*
(
gli1
+
gli6
)
*
psc3
+
rr5
*
(
gli2
+
gli7
)
*
psc5
+
rr7
*
gli3
*
psc7
);
e
=
e
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
erl
;
...
...
@@ -552,23 +547,23 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
// intermediate variables for induced force terms
float
gfi1
=
0.5
f
*
bn2
*
(
gli1
+
glip1
+
gli6
+
glip6
)
+
0.5
f
*
bn2
*
scip2
+
0.5
f
*
bn3
*
(
gli2
+
glip2
+
gli7
+
glip7
)
-
0.5
f
*
bn3
*
(
sci3
*
scip4
+
scip3
*
sci4
)
+
0.5
f
*
bn4
*
(
gli3
+
glip3
);
float
gfi1
=
0.5
f
*
(
bn2
*
(
gli1
+
glip1
+
gli6
+
glip6
)
+
bn2
*
scip2
+
bn3
*
(
gli2
+
glip2
+
gli7
+
glip7
)
-
bn3
*
(
sci3
*
scip4
+
scip3
*
sci4
)
+
bn4
*
(
gli3
+
glip3
)
)
;
float
gfi2
=
-
ck
*
bn1
+
sc4
*
bn2
-
sc6
*
bn3
;
float
gfi3
=
ci
*
bn1
+
sc3
*
bn2
+
sc5
*
bn3
;
float
gfi4
=
2.0
f
*
bn2
;
float
gfi5
=
bn3
*
(
sci4
+
scip4
);
float
gfi6
=
-
bn3
*
(
sci3
+
scip3
);
float
gfri1
=
0.5
f
*
rr5
*
((
gli1
+
gli6
)
*
psc3
float
gfri1
=
0.5
f
*
(
rr5
*
((
gli1
+
gli6
)
*
psc3
+
(
glip1
+
glip6
)
*
dsc3
+
scip2
*
usc3
)
+
0.5
f
*
rr7
*
((
gli7
+
gli2
)
*
psc5
+
rr7
*
((
gli7
+
gli2
)
*
psc5
+
(
glip7
+
glip2
)
*
dsc5
-
(
sci3
*
scip4
+
scip3
*
sci4
)
*
usc5
)
+
0.5
f
*
rr9
*
(
gli3
*
psc7
+
glip3
*
dsc7
);
+
rr9
*
(
gli3
*
psc7
+
glip3
*
dsc7
)
)
;
float
gfri4
=
2.0
f
*
rr5
;
float
gfri5
=
rr7
*
(
sci4
*
psc7
+
scip4
*
dsc7
);
float
gfri6
=
-
rr7
*
(
sci3
*
psc7
+
scip3
*
dsc7
);
...
...
@@ -858,13 +853,13 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
outputForce
[
2
]
=
conversionFactor
*
(
ftm23
+
ftm2i3
);
conversionFactor
*=
-
1.0
;
outputTorque
[
0
]
[
0
]
=
conversionFactor
*
(
ttm21
+
ttm2i1
);
outputTorque
[
0
][
1
]
=
conversionFactor
*
(
ttm22
+
ttm2i2
);
outputTorque
[
0
][
2
]
=
conversionFactor
*
(
ttm23
+
ttm2i3
);
outputTorque
[
0
]
.
x
=
conversionFactor
*
(
ttm21
+
ttm2i1
);
outputTorque
[
1
]
.
x
=
conversionFactor
*
(
ttm22
+
ttm2i2
);
outputTorque
[
2
]
.
x
=
conversionFactor
*
(
ttm23
+
ttm2i3
);
outputTorque
[
1
]
[
0
]
=
conversionFactor
*
(
ttm31
+
ttm3i1
);
outputTorque
[
1
]
[
1
]
=
conversionFactor
*
(
ttm32
+
ttm3i2
);
outputTorque
[
1
]
[
2
]
=
conversionFactor
*
(
ttm33
+
ttm3i3
);
outputTorque
[
1
]
.
x
=
conversionFactor
*
(
ttm31
+
ttm3i1
);
outputTorque
[
1
]
.
y
=
conversionFactor
*
(
ttm32
+
ttm3i2
);
outputTorque
[
1
]
.
z
=
conversionFactor
*
(
ttm33
+
ttm3i3
);
#ifdef AMOEBA_DEBUG
int
debugIndex
=
0
;
...
...
@@ -959,13 +954,13 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
outputForce
[
1
]
=
0.0
f
;
outputForce
[
2
]
=
0.0
f
;
outputTorque
[
0
]
[
0
]
=
0.0
f
;
outputTorque
[
0
]
[
1
]
=
0.0
f
;
outputTorque
[
0
]
[
2
]
=
0.0
f
;
outputTorque
[
0
]
.
x
=
0.0
f
;
outputTorque
[
0
]
.
y
=
0.0
f
;
outputTorque
[
0
]
.
z
=
0.0
f
;
outputTorque
[
1
]
[
0
]
=
0.0
f
;
outputTorque
[
1
]
[
1
]
=
0.0
f
;
outputTorque
[
1
]
[
2
]
=
0.0
f
;
outputTorque
[
1
]
.
x
=
0.0
f
;
outputTorque
[
1
]
.
y
=
0.0
f
;
outputTorque
[
1
]
.
z
=
0.0
f
;
*
energy
=
0.0
f
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
View file @
408469c3
...
...
@@ -117,7 +117,7 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
{
float
force
[
3
];
float
torque
[
2
]
[
3
]
;
float
3
torque
[
2
];
unsigned
int
atomJ
=
y
+
j
;
...
...
@@ -151,9 +151,9 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
localParticle
.
force
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
localParticle
.
force
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
mask
?
torque
[
0
]
[
0
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
]
[
1
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
]
[
2
]
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
mask
?
torque
[
0
]
.
x
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
]
.
y
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
]
.
z
:
0
.
0
f
;
totalEnergy
+=
mask
?
0
.
5
*
energy
:
0
.
0
f
;
...
...
@@ -181,15 +181,15 @@ if( atomI == targetAtom ){
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
torque
[
0
]
[
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
torque
[
0
]
[
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
torque
[
0
]
[
2
]
:
0
.
0
f
;
debugArray
[
index
].
x
=
mask
?
torque
[
0
]
.
x
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
torque
[
0
]
.
y
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
torque
[
0
]
.
z
:
0
.
0
f
;
debugArray
[
index
].
w
=
mask
?
energy
:
0
.
0
f
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
torque
[
0
]
[
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
torque
[
0
]
[
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
torque
[
0
]
[
2
]
:
0
.
0
f
;
debugArray
[
index
].
x
=
mask
?
torque
[
0
]
.
x
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
torque
[
0
]
.
y
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
torque
[
0
]
.
z
:
0
.
0
f
;
debugArray
[
index
].
w
=
(
float
)
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
for
(
int
pullIndex
=
0
;
pullIndex
<
maxPullIndex
;
pullIndex
++
){
...
...
@@ -304,7 +304,7 @@ if( atomI == targetAtom ){
unsigned
int
atomJ
=
y
+
jIdx
;
float
force
[
3
];
float
torque
[
2
]
[
3
]
;
float
3
torque
[
2
];
// set scale factors
...
...
@@ -335,9 +335,9 @@ if( atomI == targetAtom ){
localParticle
.
force
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
localParticle
.
force
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
mask
?
torque
[
0
]
[
0
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
]
[
1
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
]
[
2
]
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
mask
?
torque
[
0
]
.
x
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
]
.
y
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
]
.
z
:
0
.
0
f
;
totalEnergy
+=
mask
?
energy
:
0
.
0
f
;
...
...
@@ -349,9 +349,9 @@ if( atomI == targetAtom ){
psA
[
jIdx
].
force
[
1
]
-=
mask
?
force
[
1
]
:
0
.
0
f
;
psA
[
jIdx
].
force
[
2
]
-=
mask
?
force
[
2
]
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
0
]
+=
mask
?
torque
[
1
]
[
0
]
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
1
]
+=
mask
?
torque
[
1
]
[
1
]
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
2
]
+=
mask
?
torque
[
1
]
[
2
]
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
0
]
+=
mask
?
torque
[
1
]
.
x
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
1
]
+=
mask
?
torque
[
1
]
.
y
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
2
]
+=
mask
?
torque
[
1
]
.
z
:
0
.
0
f
;
}
else
{
...
...
@@ -359,9 +359,9 @@ if( atomI == targetAtom ){
sA
[
threadIdx
.
x
].
tempForce
[
1
]
=
mask
?
0
.
0
f
:
force
[
1
];
sA
[
threadIdx
.
x
].
tempForce
[
2
]
=
mask
?
0
.
0
f
:
force
[
2
];
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
=
mask
?
0
.
0
f
:
torque
[
1
]
[
0
]
;
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
=
mask
?
0
.
0
f
:
torque
[
1
]
[
1
]
;
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
=
mask
?
0
.
0
f
:
torque
[
1
]
[
2
]
;
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
=
mask
?
0
.
0
f
:
torque
[
1
]
.
x
;
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
=
mask
?
0
.
0
f
:
torque
[
1
]
.
y
;
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
=
mask
?
0
.
0
f
:
torque
[
1
]
.
z
;
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
View file @
408469c3
...
...
@@ -187,6 +187,7 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
zr
-=
floor
(
zr
*
cSim
.
invPeriodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
float
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
if
(
r2
<=
cSim
.
nonbondedCutoffSqr
){
float
r
=
sqrtf
(
r2
);
// calculate the error function damping terms
...
...
@@ -317,8 +318,6 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
// increment the field at each site due to this interaction
if
(
r2
<=
cSim
.
nonbondedCutoffSqr
){
fields
[
0
].
x
=
fim0
-
fid0
;
fields
[
1
].
x
=
fim1
-
fid1
;
fields
[
2
].
x
=
fim2
-
fid2
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
408469c3
...
...
@@ -75,6 +75,7 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
zr
-=
floor
(
zr
*
cSim
.
invPeriodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
float
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
if
(
r2
<=
cSim
.
nonbondedCutoffSqr
){
float
r
=
sqrtf
(
r2
);
// calculate the error function damping terms
...
...
@@ -160,8 +161,6 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
// increment the field at each site due to this interaction
if
(
r2
<=
cSim
.
nonbondedCutoffSqr
){
fields
[
0
].
x
=
fimd0
-
fid0
;
fields
[
0
].
y
=
fkmd0
-
fkd0
;
fields
[
0
].
z
=
fimp0
-
fip0
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
View file @
408469c3
...
...
@@ -29,7 +29,7 @@
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 1
3
0)
#elif (__CUDA_ARCH__ >= 1
2
0)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment