Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
705ec545
Commit
705ec545
authored
Jun 30, 2009
by
Peter Eastman
Browse files
Optimizations to Ewald summation (in progress)
parent
530e4ea0
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
92 additions
and
112 deletions
+92
-112
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+1
-1
platforms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
...orms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
+44
-41
platforms/cuda/src/kernels/kCalculateCDLJEwaldReciprocal.h
platforms/cuda/src/kernels/kCalculateCDLJEwaldReciprocal.h
+40
-60
platforms/cuda/src/kernels/kLangevinUpdate.h
platforms/cuda/src/kernels/kLangevinUpdate.h
+1
-1
platforms/reference/src/SimTKReference/ReferenceLJCoulombIxn.cpp
...ms/reference/src/SimTKReference/ReferenceLJCoulombIxn.cpp
+6
-9
No files found.
platforms/cuda/src/kernels/gpu.cpp
View file @
705ec545
...
...
@@ -432,7 +432,7 @@ void gpuSetEwaldParameters(gpuContext gpu)//, float alphaEwald, int kmax )
gpu
->
sim
.
alphaEwald
=
alpha
;
gpu
->
sim
.
factorEwald
=
-
1
/
(
4
*
alpha
*
alpha
);
gpu
->
sim
.
kmax
=
20
+
1
;
gpu
->
psEwaldEikr
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
atoms
*
gpu
->
sim
.
kmax
,
1
,
"EwaldEikr"
);
gpu
->
psEwaldEikr
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
atoms
*
gpu
->
sim
.
kmax
*
3
,
1
,
"EwaldEikr"
);
gpu
->
sim
.
pEwaldEikr
=
gpu
->
psEwaldEikr
->
_pDevStream
[
0
];
gpu
->
psEwaldCosSinSum
=
new
CUDAStream
<
float2
>
((
gpu
->
sim
.
kmax
*
2
-
1
)
*
(
gpu
->
sim
.
kmax
*
2
-
1
)
*
(
gpu
->
sim
.
kmax
*
2
-
1
),
1
,
"EwaldCosSinSum"
);
gpu
->
sim
.
pEwaldCosSinSum
=
gpu
->
psEwaldCosSinSum
->
_pDevStream
[
0
];
...
...
platforms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
View file @
705ec545
...
...
@@ -24,25 +24,25 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/* Define multiply operations for floats */
/* Define multiply operations for floats */
__device__
float2
MultofFloat2
(
float2
a
,
float2
b
)
{
__device__
float2
MultofFloat2
(
float2
a
,
float2
b
)
{
float2
c
;
c
.
x
=
a
.
x
*
b
.
x
-
a
.
y
*
b
.
y
;
c
.
y
=
a
.
x
*
b
.
y
+
a
.
y
*
b
.
x
;
return
c
;
}
}
__device__
float2
ConjMultofFloat2
(
float2
a
,
float2
b
)
{
__device__
float2
ConjMultofFloat2
(
float2
a
,
float2
b
)
{
float2
c
;
c
.
x
=
a
.
x
*
b
.
x
+
a
.
y
*
b
.
y
;
c
.
y
=
a
.
y
*
b
.
x
-
a
.
x
*
b
.
y
;
return
c
;
}
}
#define EIR(x, y, z) cSim.pEwaldEikr[(x)+(y)*cSim.kmax+(z)*cSim.kmax*3]
__global__
void
kCalculateEwaldFastEikr_kernel
()
{
...
...
@@ -63,23 +63,23 @@ __global__ void kCalculateEwaldFastEikr_kernel()
// k = 0, explicitly
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
cSim
.
pEwaldEikr
[
atom
*
kmax
*
3
+
0
+
m
]
.
x
=
1
;
cSim
.
pEwaldEikr
[
atom
*
kmax
*
3
+
0
+
m
]
.
y
=
0
;
EIR
(
atom
,
0
,
m
)
.
x
=
1
;
EIR
(
atom
,
0
,
m
)
.
y
=
0
;
}
// k = 1, explicitly
cSim
.
pEwaldEikr
[
atom
*
kmax
*
3
+
3
+
0
]
.
x
=
cos
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pEwaldEikr
[
atom
*
kmax
*
3
+
3
+
0
]
.
y
=
sin
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
EIR
(
atom
,
1
,
0
)
.
x
=
cos
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
EIR
(
atom
,
1
,
0
)
.
y
=
sin
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pEwaldEikr
[
atom
*
kmax
*
3
+
3
+
1
]
.
x
=
cos
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pEwaldEikr
[
atom
*
kmax
*
3
+
3
+
1
]
.
y
=
sin
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
EIR
(
atom
,
1
,
1
)
.
x
=
cos
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
EIR
(
atom
,
1
,
1
)
.
y
=
sin
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pEwaldEikr
[
atom
*
kmax
*
3
+
3
+
2
]
.
x
=
cos
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
cSim
.
pEwaldEikr
[
atom
*
kmax
*
3
+
3
+
2
]
.
y
=
sin
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
EIR
(
atom
,
1
,
2
)
.
x
=
cos
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
EIR
(
atom
,
1
,
2
)
.
y
=
sin
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
// k > 1, by recursion
for
(
unsigned
int
k
=
2
;
(
k
<
kmax
);
k
++
)
{
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
cSim
.
pEwaldEikr
[
atom
*
kmax
*
3
+
k
*
3
+
m
]
=
MultofFloat2
(
cSim
.
pEwaldEikr
[
atom
*
kmax
*
3
+
(
k
-
1
)
*
3
+
m
]
,
cSim
.
pEwaldEikr
[
atom
*
kmax
*
3
+
3
+
m
]
);
EIR
(
atom
,
k
,
m
)
=
MultofFloat2
(
EIR
(
atom
,
k
-
1
,
m
),
EIR
(
atom
,
1
,
m
)
);
}
}
...
...
@@ -102,11 +102,11 @@ __global__ void kCalculateEwaldFastCosSinSums_kernel()
float2
sum
=
make_float2
(
0
.
0
f
,
0
.
0
f
);
for
(
int
atom
=
0
;
atom
<
cSim
.
atoms
;
atom
++
)
{
float2
tab_xy
=
(
ry
>=
0
?
MultofFloat2
(
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
rx
*
3
+
0
]
,
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
ry
*
3
+
1
]
)
:
ConjMultofFloat2
(
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
rx
*
3
+
0
]
,
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
-
ry
*
3
+
1
]
));
float2
tab_xy
=
(
ry
>=
0
?
MultofFloat2
(
EIR
(
atom
,
rx
,
0
)
,
EIR
(
atom
,
ry
,
1
)
)
:
ConjMultofFloat2
(
EIR
(
atom
,
rx
,
0
)
,
EIR
(
atom
,
-
ry
,
1
)
));
float
charge
=
cSim
.
pPosq
[
atom
].
w
;
float2
structureFactor
=
(
rz
>=
0
?
MultofFloat2
(
tab_xy
,
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
rz
*
3
+
2
]
)
:
ConjMultofFloat2
(
tab_xy
,
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
-
rz
*
3
+
2
]
));
float2
structureFactor
=
(
rz
>=
0
?
MultofFloat2
(
tab_xy
,
EIR
(
atom
,
rz
,
2
)
)
:
ConjMultofFloat2
(
tab_xy
,
EIR
(
atom
,
-
rz
,
2
)
));
sum
.
x
+=
charge
*
structureFactor
.
x
;
sum
.
y
+=
charge
*
structureFactor
.
y
;
}
...
...
@@ -132,29 +132,32 @@ __global__ void kCalculateEwaldFastForces_kernel()
while
(
atom
<
cSim
.
atoms
)
{
float4
force
=
cSim
.
pForce4
[
atom
];
float
charge
=
cSim
.
pPosq
[
atom
].
w
;
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
float
kx
=
rx
*
cSim
.
recipBoxSizeX
;
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
float
ky
=
ry
*
cSim
.
recipBoxSizeY
;
float2
tab_xy
=
(
ry
>=
0
?
MultofFloat2
(
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
rx
*
3
+
0
]
,
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
ry
*
3
+
1
]
)
:
ConjMultofFloat2
(
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
rx
*
3
+
0
]
,
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
-
ry
*
3
+
1
]
));
float2
tab_xy
=
(
ry
>=
0
?
MultofFloat2
(
EIR
(
atom
,
rx
,
0
)
,
EIR
(
atom
,
ry
,
1
)
)
:
ConjMultofFloat2
(
EIR
(
atom
,
rx
,
0
)
,
EIR
(
atom
,
-
ry
,
1
)
));
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
float
kz
=
rz
*
cSim
.
recipBoxSizeZ
;
int
index
=
rx
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
)
+
(
ry
+
numRy
-
1
)
*
(
numRz
*
2
-
1
)
+
(
rz
+
numRz
-
1
);
float
k2
=
kx
*
kx
+
ky
*
ky
+
kz
*
kz
;
float
ak
=
exp
(
k2
*
cSim
.
factorEwald
)
/
k2
;
float2
structureFactor
=
(
rz
>=
0
?
MultofFloat2
(
tab_xy
,
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
rz
*
3
+
2
])
:
ConjMultofFloat2
(
tab_xy
,
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
-
rz
*
3
+
2
]));
float
dEdR
=
ak
*
charge
*
(
cSim
.
pEwaldCosSinSum
[
index
].
x
*
structureFactor
.
y
-
cSim
.
pEwaldCosSinSum
[
index
].
y
*
structureFactor
.
x
);
cSim
.
pForce4
[
atom
].
x
+=
2
*
recipCoeff
*
dEdR
*
kx
;
cSim
.
pForce4
[
atom
].
y
+=
2
*
recipCoeff
*
dEdR
*
ky
;
cSim
.
pForce4
[
atom
].
z
+=
2
*
recipCoeff
*
dEdR
*
kz
;
float2
structureFactor
=
(
rz
>=
0
?
MultofFloat2
(
tab_xy
,
EIR
(
atom
,
rz
,
2
))
:
ConjMultofFloat2
(
tab_xy
,
EIR
(
atom
,
-
rz
,
2
)));
float2
cosSinSum
=
cSim
.
pEwaldCosSinSum
[
index
];
float
dEdR
=
ak
*
charge
*
(
cosSinSum
.
x
*
structureFactor
.
y
-
cosSinSum
.
y
*
structureFactor
.
x
);
force
.
x
+=
2
*
recipCoeff
*
dEdR
*
kx
;
force
.
y
+=
2
*
recipCoeff
*
dEdR
*
ky
;
force
.
z
+=
2
*
recipCoeff
*
dEdR
*
kz
;
lowrz
=
1
-
numRz
;
}
lowry
=
1
-
numRy
;
}
}
cSim
.
pForce4
[
atom
]
=
force
;
atom
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
platforms/cuda/src/kernels/kCalculateCDLJEwaldReciprocal.h
View file @
705ec545
...
...
@@ -32,74 +32,54 @@
__global__
void
kCalculateCDLJEwaldReciprocalForces_kernel
()
{
float
eps0
=
5.72765E-4
;
int
numRx
=
20
+
1
;
int
numRy
=
20
+
1
;
int
numRz
=
20
+
1
;
int
lowry
,
lowrz
;
float
kx
,
ky
,
kz
,
k2
,
ek
;
float
Qi
,
Qj
,
SinI
,
SinJ
,
CosI
,
CosJ
;
float
V
=
cSim
.
cellVolume
;
float4
apos1
,
apos2
;
// i nteracting atoms
float4
af
;
// atomic force
const
float
eps0
=
1
.
0
f
/
(
4
.
0
f
*
3
.
1415926535
f
*
cSim
.
epsfac
);
unsigned
int
atomID1
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
atomID1
<
cSim
.
atoms
)
{
apos1
=
cSim
.
pPosq
[
atomID1
];
af
=
cSim
.
pForce4
[
atomID1
];
float4
apos1
=
cSim
.
pPosq
[
atomID1
];
float4
af
=
cSim
.
pForce4
[
atomID1
];
unsigned
int
atomID2
=
0
;
while
(
atomID2
<
cSim
.
atoms
)
{
apos2
=
cSim
.
pPosq
[
atomID2
];
lowry
=
0
;
lowrz
=
1
;
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
kx
=
rx
*
cSim
.
recipBoxSizeX
;
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
ky
=
ry
*
cSim
.
recipBoxSizeY
;
float4
apos2
=
cSim
.
pPosq
[
atomID2
];
float
scale
=
2
.
0
f
*
apos1
.
w
*
apos2
.
w
/
(
cSim
.
cellVolume
*
eps0
);
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
int
lowry
=
0
;
int
lowrz
=
1
;
kz
=
rz
*
cSim
.
recipBoxSizeZ
;
k2
=
kx
*
kx
+
ky
*
ky
+
kz
*
kz
;
ek
=
exp
(
k2
*
cSim
.
factorEwald
);
Qi
=
apos1
.
w
;
Qj
=
apos2
.
w
;
SinI
=
sin
(
kx
*
apos1
.
x
+
ky
*
apos1
.
y
+
kz
*
apos1
.
z
);
SinJ
=
sin
(
kx
*
apos2
.
x
+
ky
*
apos2
.
y
+
kz
*
apos2
.
z
);
CosI
=
cos
(
kx
*
apos1
.
x
+
ky
*
apos1
.
y
+
kz
*
apos1
.
z
);
CosJ
=
cos
(
kx
*
apos2
.
x
+
ky
*
apos2
.
y
+
kz
*
apos2
.
z
);
af
.
x
-=
(
2
.
0
/
(
V
*
eps0
))
*
Qi
*
(
kx
/
k2
)
*
ek
*
(
-
SinI
*
Qj
*
CosJ
+
CosI
*
Qj
*
SinJ
);
af
.
y
-=
(
2
.
0
/
(
V
*
eps0
))
*
Qi
*
(
ky
/
k2
)
*
ek
*
(
-
SinI
*
Qj
*
CosJ
+
CosI
*
Qj
*
SinJ
);
af
.
z
-=
(
2
.
0
/
(
V
*
eps0
))
*
Qi
*
(
kz
/
k2
)
*
ek
*
(
-
SinI
*
Qj
*
CosJ
+
CosI
*
Qj
*
SinJ
);
lowrz
=
1
-
numRz
;
for
(
int
rx
=
0
;
rx
<
cSim
.
kmax
;
rx
++
)
{
float
kx
=
rx
*
cSim
.
recipBoxSizeX
;
for
(
int
ry
=
lowry
;
ry
<
cSim
.
kmax
;
ry
++
)
{
float
ky
=
ry
*
cSim
.
recipBoxSizeY
;
for
(
int
rz
=
lowrz
;
rz
<
cSim
.
kmax
;
rz
++
)
{
float
kz
=
rz
*
cSim
.
recipBoxSizeZ
;
float
k2
=
kx
*
kx
+
ky
*
ky
+
kz
*
kz
;
float
ek
=
exp
(
k2
*
cSim
.
factorEwald
);
float
arg1
=
kx
*
apos1
.
x
+
ky
*
apos1
.
y
+
kz
*
apos1
.
z
;
float
arg2
=
kx
*
apos2
.
x
+
ky
*
apos2
.
y
+
kz
*
apos2
.
z
;
float
sinI
=
sin
(
arg1
);
float
sinJ
=
sin
(
arg2
);
float
cosI
=
cos
(
arg1
);
float
cosJ
=
cos
(
arg2
);
float
f
=
scale
*
ek
*
(
-
sinI
*
cosJ
+
cosI
*
sinJ
)
/
k2
;
af
.
x
-=
kx
*
f
;
af
.
y
-=
ky
*
f
;
af
.
z
-=
kz
*
f
;
lowrz
=
1
-
cSim
.
kmax
;
}
lowry
=
1
-
numRy
;
lowry
=
1
-
cSim
.
kmax
;
}
}
atomID2
++
;
}
cSim
.
pForce4
[
atomID1
]
=
af
;
atomID1
+=
blockDim
.
x
*
gridDim
.
x
;
...
...
platforms/cuda/src/kernels/kLangevinUpdate.h
View file @
705ec545
...
...
@@ -155,7 +155,7 @@ __global__ void kLangevinUpdatePart2_kernel()
CM
.
x
+=
mass
*
velocity
.
x
;
CM
.
y
+=
mass
*
velocity
.
y
;
CM
.
z
+=
mass
*
velocity
.
z
;
#endif
;
#endif
Xmh
.
x
=
vVector
.
x
*
cSim
.
TauDOverEMMinusOne
+
sqrtInvMass
*
random4b
.
x
;
...
...
platforms/reference/src/SimTKReference/ReferenceLJCoulombIxn.cpp
View file @
705ec545
...
...
@@ -36,6 +36,8 @@
// make sure that erf() and erfc() are defined.
#include "MSVC_erfc.h"
using
std
::
vector
;
/**---------------------------------------------------------------------------------------
ReferenceLJCoulombIxn constructor
...
...
@@ -279,9 +281,9 @@ int ReferenceLJCoulombIxn::calculateEwaldIxn( int numberOfAtoms, RealOpenMM** at
// setup K-vectors
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
d_complex
*
eir
=
new
d_complex
[
kmax
*
numberOfAtoms
*
3
]
;
d_complex
*
tab_xy
=
new
d_complex
[
numberOfAtoms
]
;
d_complex
*
tab_qxyz
=
new
d_complex
[
numberOfAtoms
]
;
vector
<
d_complex
>
eir
(
kmax
*
numberOfAtoms
*
3
)
;
vector
<
d_complex
>
tab_xy
(
numberOfAtoms
)
;
vector
<
d_complex
>
tab_qxyz
(
numberOfAtoms
)
;
if
(
kmax
<
1
)
{
std
::
stringstream
message
;
...
...
@@ -389,7 +391,7 @@ int ReferenceLJCoulombIxn::calculateEwaldIxn( int numberOfAtoms, RealOpenMM** at
// allocate and initialize exclusion array
int
*
exclusionIndices
=
new
int
[
numberOfAtoms
]
;
vector
<
int
>
exclusionIndices
(
numberOfAtoms
)
;
for
(
int
ii
=
0
;
ii
<
numberOfAtoms
;
ii
++
){
exclusionIndices
[
ii
]
=
-
1
;
}
...
...
@@ -428,11 +430,6 @@ int ReferenceLJCoulombIxn::calculateEwaldIxn( int numberOfAtoms, RealOpenMM** at
}
}
delete
[]
exclusionIndices
;
delete
[]
eir
;
delete
[]
tab_xy
;
delete
[]
tab_qxyz
;
// ***********************************************************************
if
(
totalEnergy
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment