Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
9eb02774
Commit
9eb02774
authored
Jul 01, 2009
by
Peter Eastman
Browse files
Optimizations to Ewald summation
parent
cfafe7f2
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
54 additions
and
74 deletions
+54
-74
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+0
-1
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+1
-7
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+1
-2
platforms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
...orms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
+49
-59
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
+3
-5
No files found.
platforms/cuda/src/kernels/cudatypes.h
View file @
9eb02774
...
...
@@ -337,7 +337,6 @@ struct cudaGmxSimulation {
float
collisionProbability
;
// Collision probability for Andersen thermostat
float2
*
pObcData
;
// Pointer to fixed Born data
float2
*
pAttr
;
// Pointer to additional atom attributes (sig, eps)
float2
*
pEwaldEikr
;
// Pointer to exponents of reciprocal vectors and atom coordinates (ewald)
float2
*
pEwaldCosSinSum
;
// Pointer to the cos/sin sums (ewald)
unsigned
int
bonds
;
// Number of bonds
int4
*
pBondID
;
// Bond atom and output buffer IDs
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
9eb02774
...
...
@@ -432,8 +432,6 @@ void gpuSetEwaldParameters(gpuContext gpu)//, float alphaEwald, int kmax )
gpu
->
sim
.
alphaEwald
=
alpha
;
gpu
->
sim
.
factorEwald
=
-
1
/
(
4
*
alpha
*
alpha
);
gpu
->
sim
.
kmax
=
20
+
1
;
gpu
->
psEwaldEikr
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
atoms
*
gpu
->
sim
.
kmax
*
3
,
1
,
"EwaldEikr"
);
gpu
->
sim
.
pEwaldEikr
=
gpu
->
psEwaldEikr
->
_pDevStream
[
0
];
gpu
->
psEwaldCosSinSum
=
new
CUDAStream
<
float2
>
((
gpu
->
sim
.
kmax
*
2
-
1
)
*
(
gpu
->
sim
.
kmax
*
2
-
1
)
*
(
gpu
->
sim
.
kmax
*
2
-
1
),
1
,
"EwaldCosSinSum"
);
gpu
->
sim
.
pEwaldCosSinSum
=
gpu
->
psEwaldCosSinSum
->
_pDevStream
[
0
];
}
...
...
@@ -1274,7 +1272,6 @@ void* gpuInit(int numAtoms)
gpu
->
psRbDihedralParameter2
=
NULL
;
gpu
->
psLJ14ID
=
NULL
;
gpu
->
psLJ14Parameter
=
NULL
;
gpu
->
psEwaldEikr
=
NULL
;
gpu
->
psEwaldCosSinSum
=
NULL
;
gpu
->
psShakeID
=
NULL
;
gpu
->
psShakeParameter
=
NULL
;
...
...
@@ -1404,11 +1401,8 @@ void gpuShutDown(gpuContext gpu)
delete
gpu
->
psxVector4
;
delete
gpu
->
psvVector4
;
delete
gpu
->
psSigEps2
;
if
(
gpu
->
psEwaldEikr
!=
NULL
)
{
delete
gpu
->
psEwaldEikr
;
if
(
gpu
->
psEwaldCosSinSum
!=
NULL
)
delete
gpu
->
psEwaldCosSinSum
;
}
delete
gpu
->
psObcData
;
delete
gpu
->
psObcChain
;
delete
gpu
->
psBornForce
;
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
9eb02774
...
...
@@ -86,7 +86,6 @@ struct _gpuContext {
CUDAStream
<
float4
>*
psxVector4
;
CUDAStream
<
float4
>*
psvVector4
;
CUDAStream
<
float2
>*
psSigEps2
;
CUDAStream
<
float2
>*
psEwaldEikr
;
CUDAStream
<
float2
>*
psEwaldCosSinSum
;
CUDAStream
<
float2
>*
psObcData
;
CUDAStream
<
float
>*
psObcChain
;
...
...
platforms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
View file @
9eb02774
...
...
@@ -24,6 +24,11 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernel for evaluating nonbonded forces using the
* Ewald summation method (Reciprocal space summation).
*/
/* Define multiply operations for floats */
__device__
float2
MultofFloat2
(
float2
a
,
float2
b
)
...
...
@@ -42,50 +47,9 @@ __device__ float2 ConjMultofFloat2(float2 a, float2 b)
return
c
;
}
#define EIR(x, y, z) cSim.pEwaldEikr[(x)+(y)*cSim.kmax+(z)*cSim.kmax*3]
__global__
void
kCalculateEwaldFastEikr_kernel
()
{
int
kmax
=
cSim
.
kmax
;
float4
apos
;
unsigned
int
atom
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
atom
<
cSim
.
atoms
)
{
apos
=
cSim
.
pPosq
[
atom
];
//generic form of the array
// pEikr[ atomID*kmax*3 + k*3 + m]
// k = 0, explicitly
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
EIR
(
atom
,
0
,
m
).
x
=
1
;
EIR
(
atom
,
0
,
m
).
y
=
0
;
}
// k = 1, explicitly
EIR
(
atom
,
1
,
0
).
x
=
cos
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
EIR
(
atom
,
1
,
0
).
y
=
sin
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
EIR
(
atom
,
1
,
1
).
x
=
cos
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
EIR
(
atom
,
1
,
1
).
y
=
sin
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
EIR
(
atom
,
1
,
2
).
x
=
cos
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
EIR
(
atom
,
1
,
2
).
y
=
sin
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
// k > 1, by recursion
for
(
unsigned
int
k
=
2
;
(
k
<
kmax
);
k
++
)
{
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
EIR
(
atom
,
k
,
m
)
=
MultofFloat2
(
EIR
(
atom
,
k
-
1
,
m
),
EIR
(
atom
,
1
,
m
));
}
}
atom
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
/**
* Precompute the cosine and sine sums which appear in each force term.
*/
__global__
void
kCalculateEwaldFastCosSinSums_kernel
()
{
...
...
@@ -94,27 +58,42 @@ __global__ void kCalculateEwaldFastCosSinSums_kernel()
unsigned
int
index
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
index
<
totalK
)
{
// Find the wave vector (kx, ky, kz) this index corresponds to.
int
rx
=
index
/
(
ksize
*
ksize
);
int
remainder
=
index
-
rx
*
ksize
*
ksize
;
int
ry
=
remainder
/
ksize
;
int
rz
=
remainder
-
ry
*
ksize
-
cSim
.
kmax
+
1
;
ry
+=
-
cSim
.
kmax
+
1
;
float
kx
=
rx
*
cSim
.
recipBoxSizeX
;
float
ky
=
ry
*
cSim
.
recipBoxSizeY
;
float
kz
=
rz
*
cSim
.
recipBoxSizeZ
;
// Compute the sum for this wave vector.
float2
sum
=
make_float2
(
0
.
0
f
,
0
.
0
f
);
for
(
int
atom
=
0
;
atom
<
cSim
.
atoms
;
atom
++
)
{
float2
tab_xy
=
(
ry
>=
0
?
MultofFloat2
(
EIR
(
atom
,
rx
,
0
),
EIR
(
atom
,
ry
,
1
))
:
ConjMultofFloat2
(
EIR
(
atom
,
rx
,
0
),
EIR
(
atom
,
-
ry
,
1
)));
float
charge
=
cSim
.
pPosq
[
atom
].
w
;
float2
structureFactor
=
(
rz
>=
0
?
MultofFloat2
(
tab_xy
,
EIR
(
atom
,
rz
,
2
))
:
ConjMultofFloat2
(
tab_xy
,
EIR
(
atom
,
-
rz
,
2
)));
sum
.
x
+=
charge
*
structureFactor
.
x
;
sum
.
y
+=
charge
*
structureFactor
.
y
;
float4
apos
=
cSim
.
pPosq
[
atom
];
float
phase
=
apos
.
x
*
kx
;
float2
structureFactor
=
make_float2
(
cos
(
phase
),
sin
(
phase
));
phase
=
apos
.
y
*
ky
;
structureFactor
=
MultofFloat2
(
structureFactor
,
make_float2
(
cos
(
phase
),
sin
(
phase
)));
phase
=
apos
.
z
*
kz
;
structureFactor
=
MultofFloat2
(
structureFactor
,
make_float2
(
cos
(
phase
),
sin
(
phase
)));
sum
.
x
+=
apos
.
w
*
structureFactor
.
x
;
sum
.
y
+=
apos
.
w
*
structureFactor
.
y
;
}
cSim
.
pEwaldCosSinSum
[
index
]
=
sum
;
index
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
/**
* Compute the reciprocal space part of the Ewald force, using the precomputed sums from the
* previous routine.
*/
__global__
void
kCalculateEwaldFastForces_kernel
()
{
...
...
@@ -122,8 +101,6 @@ __global__ void kCalculateEwaldFastForces_kernel()
const
float
epsilon
=
1
.
0
;
float
recipCoeff
=
cSim
.
epsfac
*
(
4
*
PI
/
cSim
.
cellVolume
/
epsilon
);
int
lowry
=
0
;
int
lowrz
=
1
;
const
int
numRx
=
cSim
.
kmax
;
const
int
numRy
=
cSim
.
kmax
;
const
int
numRz
=
cSim
.
kmax
;
...
...
@@ -133,22 +110,32 @@ __global__ void kCalculateEwaldFastForces_kernel()
while
(
atom
<
cSim
.
atoms
)
{
float4
force
=
cSim
.
pForce4
[
atom
];
float
charge
=
cSim
.
pPosq
[
atom
].
w
;
float4
apos
=
cSim
.
pPosq
[
atom
];
// Loop over all wave vectors.
int
lowry
=
0
;
int
lowrz
=
1
;
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
float
kx
=
rx
*
cSim
.
recipBoxSizeX
;
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
float
ky
=
ry
*
cSim
.
recipBoxSizeY
;
float2
tab_xy
=
(
ry
>=
0
?
MultofFloat2
(
EIR
(
atom
,
rx
,
0
),
EIR
(
atom
,
ry
,
1
))
:
ConjMultofFloat2
(
EIR
(
atom
,
rx
,
0
),
EIR
(
atom
,
-
ry
,
1
)));
float
phase
=
apos
.
x
*
kx
;
float2
tab_xy
=
make_float2
(
cos
(
phase
),
sin
(
phase
));
phase
=
apos
.
y
*
ky
;
tab_xy
=
MultofFloat2
(
tab_xy
,
make_float2
(
cos
(
phase
),
sin
(
phase
)));
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
float
kz
=
rz
*
cSim
.
recipBoxSizeZ
;
// Compute the force contribution of this wave vector.
int
index
=
rx
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
)
+
(
ry
+
numRy
-
1
)
*
(
numRz
*
2
-
1
)
+
(
rz
+
numRz
-
1
);
float
k2
=
kx
*
kx
+
ky
*
ky
+
kz
*
kz
;
float
ak
=
exp
(
k2
*
cSim
.
factorEwald
)
/
k2
;
float2
structureFactor
=
(
rz
>=
0
?
MultofFloat2
(
tab_xy
,
EIR
(
atom
,
rz
,
2
))
:
ConjMultofFloat2
(
tab_xy
,
EIR
(
atom
,
-
rz
,
2
)));
phase
=
apos
.
z
*
kz
;
float2
structureFactor
=
MultofFloat2
(
tab_xy
,
make_float2
(
cos
(
phase
),
sin
(
phase
)));
float2
cosSinSum
=
cSim
.
pEwaldCosSinSum
[
index
];
float
dEdR
=
ak
*
charge
*
(
cosSinSum
.
x
*
structureFactor
.
y
-
cosSinSum
.
y
*
structureFactor
.
x
);
float
dEdR
=
ak
*
apos
.
w
*
(
cosSinSum
.
x
*
structureFactor
.
y
-
cosSinSum
.
y
*
structureFactor
.
x
);
force
.
x
+=
2
*
recipCoeff
*
dEdR
*
kx
;
force
.
y
+=
2
*
recipCoeff
*
dEdR
*
ky
;
force
.
z
+=
2
*
recipCoeff
*
dEdR
*
kz
;
...
...
@@ -157,6 +144,9 @@ __global__ void kCalculateEwaldFastForces_kernel()
lowry
=
1
-
numRy
;
}
}
// Record the force on the atom.
cSim
.
pForce4
[
atom
]
=
force
;
atom
+=
blockDim
.
x
*
gridDim
.
x
;
}
...
...
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
View file @
9eb02774
...
...
@@ -212,10 +212,8 @@ void kCalculateCDLJForces(gpuContext gpu)
}
else
{
// O(N3/2) Ewald summation
kCalculateEwaldFastEikr_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateEwaldFastEikr"
);
kCalculateEwaldFastCosSinSums_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
// Fast Ewald summation
kCalculateEwaldFastCosSinSums_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateEwaldFastCosSinSums"
);
kCalculateEwaldFastForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateEwaldFastForces"
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment