Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
353529b1
Commit
353529b1
authored
May 21, 2009
by
Rossen Apostolov
Browse files
Preliminary implementation of faster (N^3/2) Ewald kernels in Cuda
parent
52c04559
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
269 additions
and
7 deletions
+269
-7
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+3
-0
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+9
-0
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+3
-0
platforms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
...orms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
+234
-0
platforms/cuda/src/kernels/kCalculateCDLJEwaldReciprocal.h
platforms/cuda/src/kernels/kCalculateCDLJEwaldReciprocal.h
+9
-7
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
+11
-0
No files found.
platforms/cuda/src/kernels/cudatypes.h
View file @
353529b1
...
@@ -335,6 +335,9 @@ struct cudaGmxSimulation {
...
@@ -335,6 +335,9 @@ struct cudaGmxSimulation {
float
collisionProbability
;
// Collision probability for Andersen thermostat
float
collisionProbability
;
// Collision probability for Andersen thermostat
float2
*
pObcData
;
// Pointer to fixed Born data
float2
*
pObcData
;
// Pointer to fixed Born data
float2
*
pAttr
;
// Pointer to additional atom attributes (sig, eps)
float2
*
pAttr
;
// Pointer to additional atom attributes (sig, eps)
float2
*
pEikr
;
// Pointer to exponents of reciprocal vectors and atom coordinates (ewald)
float2
*
pStructureFactor
;
// Pointer to the structure factors (ewald)
float2
*
pCosSinSum
;
// Pointer to the cos/sin sums (ewald)
unsigned
int
bonds
;
// Number of bonds
unsigned
int
bonds
;
// Number of bonds
int4
*
pBondID
;
// Bond atom and output buffer IDs
int4
*
pBondID
;
// Bond atom and output buffer IDs
float2
*
pBondParameter
;
// Bond parameters
float2
*
pBondParameter
;
// Bond parameters
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
353529b1
...
@@ -1087,6 +1087,12 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
...
@@ -1087,6 +1087,12 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
gpu
->
sim
.
pObcChain
=
gpu
->
psObcChain
->
_pDevStream
[
0
];
gpu
->
sim
.
pObcChain
=
gpu
->
psObcChain
->
_pDevStream
[
0
];
gpu
->
psSigEps2
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"SigEps2"
);
gpu
->
psSigEps2
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"SigEps2"
);
gpu
->
sim
.
pAttr
=
gpu
->
psSigEps2
->
_pDevStream
[
0
];
gpu
->
sim
.
pAttr
=
gpu
->
psSigEps2
->
_pDevStream
[
0
];
gpu
->
psEwaldEikr
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"EwaldEikr"
);
gpu
->
sim
.
pEikr
=
gpu
->
psEwaldEikr
->
_pDevStream
[
0
];
gpu
->
psEwaldStructureFactor
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"EwaldStructureFactor"
);
gpu
->
sim
.
pStructureFactor
=
gpu
->
psEwaldStructureFactor
->
_pDevStream
[
0
];
gpu
->
psEwaldCosSinSum
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"EwaldCosSinSum"
);
gpu
->
sim
.
pCosSinSum
=
gpu
->
psEwaldCosSinSum
->
_pDevStream
[
0
];
gpu
->
psObcData
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"ObcData"
);
gpu
->
psObcData
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"ObcData"
);
gpu
->
sim
.
pObcData
=
gpu
->
psObcData
->
_pDevStream
[
0
];
gpu
->
sim
.
pObcData
=
gpu
->
psObcData
->
_pDevStream
[
0
];
gpu
->
pAtomSymbol
=
new
unsigned
char
[
gpu
->
natoms
];
gpu
->
pAtomSymbol
=
new
unsigned
char
[
gpu
->
natoms
];
...
@@ -1486,6 +1492,9 @@ void gpuShutDown(gpuContext gpu)
...
@@ -1486,6 +1492,9 @@ void gpuShutDown(gpuContext gpu)
delete
gpu
->
psxVector4
;
delete
gpu
->
psxVector4
;
delete
gpu
->
psvVector4
;
delete
gpu
->
psvVector4
;
delete
gpu
->
psSigEps2
;
delete
gpu
->
psSigEps2
;
delete
gpu
->
psEwaldEikr
;
delete
gpu
->
psEwaldStructureFactor
;
delete
gpu
->
psEwaldCosSinSum
;
delete
gpu
->
psObcData
;
delete
gpu
->
psObcData
;
delete
gpu
->
psObcChain
;
delete
gpu
->
psObcChain
;
delete
gpu
->
psBornForce
;
delete
gpu
->
psBornForce
;
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
353529b1
...
@@ -86,6 +86,9 @@ struct _gpuContext {
...
@@ -86,6 +86,9 @@ struct _gpuContext {
CUDAStream
<
float4
>*
psxVector4
;
CUDAStream
<
float4
>*
psxVector4
;
CUDAStream
<
float4
>*
psvVector4
;
CUDAStream
<
float4
>*
psvVector4
;
CUDAStream
<
float2
>*
psSigEps2
;
CUDAStream
<
float2
>*
psSigEps2
;
CUDAStream
<
float2
>*
psEwaldEikr
;
CUDAStream
<
float2
>*
psEwaldStructureFactor
;
CUDAStream
<
float2
>*
psEwaldCosSinSum
;
CUDAStream
<
float2
>*
psObcData
;
CUDAStream
<
float2
>*
psObcData
;
CUDAStream
<
float
>*
psObcChain
;
CUDAStream
<
float
>*
psObcChain
;
CUDAStream
<
float
>*
psBornForce
;
CUDAStream
<
float
>*
psBornForce
;
...
...
platforms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
0 → 100644
View file @
353529b1
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Rossen P. Apostolov, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/* Define multiply operations for floats */
__device__
float2
MultofFloat2
(
float2
a
,
float2
b
)
{
float2
c
;
c
.
x
=
a
.
x
*
b
.
x
-
a
.
y
*
b
.
y
;
c
.
y
=
a
.
x
*
b
.
y
+
a
.
y
*
b
.
x
;
return
c
;
}
__device__
float2
ConjMultofFloat2
(
float2
a
,
float2
b
)
{
float2
c
;
c
.
x
=
a
.
x
*
b
.
x
+
a
.
y
*
b
.
y
;
c
.
y
=
a
.
y
*
b
.
x
-
a
.
x
*
b
.
y
;
return
c
;
}
__device__
float2
FloatMultFloat2
(
float
r
,
float2
a
)
{
float2
b
;
b
.
x
=
r
*
a
.
x
;
b
.
y
=
r
*
a
.
y
;
return
b
;
}
__device__
float2
FloatMultConjFloat2
(
float
r
,
float2
a
)
{
float2
b
;
b
.
x
=
r
*
a
.
y
;
b
.
y
=
r
*
a
.
x
;
return
b
;
}
__global__
void
kCalculateEwaldFastEikr_kernel
()
{
int
kmax
=
cSim
.
kmax
;
float4
apos
;
unsigned
int
atom
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
atom
<
cSim
.
atoms
)
{
//generic form of the array
// pEikr[ atomID*kmax*3 + k*3 + m]
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
// k = 0, explicitly
cSim
.
pEikr
[
atom
*
kmax
*
3
+
0
+
m
].
x
=
1
;
cSim
.
pEikr
[
atom
*
kmax
*
3
+
0
+
m
].
y
=
0
;
// k = 1, explicitly
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
m
].
x
=
cos
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
m
].
y
=
sin
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
4
+
m
].
x
=
cos
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
4
+
m
].
y
=
sin
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
5
+
m
].
x
=
cos
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
5
+
m
].
y
=
sin
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
}
// k > 1, by recursion
for
(
unsigned
int
k
=
2
;
(
k
<
kmax
);
k
++
)
{
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
cSim
.
pEikr
[
atom
*
kmax
*
3
+
k
*
3
+
m
]
=
MultofFloat2
(
cSim
.
pEikr
[
atom
*
kmax
*
3
+
(
k
-
1
)
*
3
+
m
]
,
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
m
]);
}
}
atom
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
__global__
void
kCalculateEwaldFastCosSinSums_kernel
()
{
// hard-coded maximum k-vectors, no interface yet
int
kmax
=
cSim
.
kmax
;
// float2 eikr;
float4
apos
;
int
lowry
=
0
;
int
lowrz
=
1
;
int
numRx
=
20
+
1
;
int
numRy
=
20
+
1
;
int
numRz
=
20
+
1
;
unsigned
int
totalK
=
(
numRx
*
2
-
1
)
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
);
float2
tab_xy
;
int
index
;
unsigned
int
atom
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
atom
<
cSim
.
atoms
)
{
apos
=
cSim
.
pPosq
[
atom
];
// **********************************************************************
// cSim.pEikr[atom*kmax*3 + k*3 + m]
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
if
(
ry
>=
0
)
{
// tab_xy[n] = EIR(rx, n, 0) * EIR(ry, n, 1);
tab_xy
=
MultofFloat2
(
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rx
*
3
+
0
]
,
cSim
.
pEikr
[
atom
*
kmax
*
3
+
ry
*
3
+
1
]);
}
else
{
// tab_xy[n] = EIR(rx, n, 0) * conj (EIR(-ry, n, 1));
tab_xy
=
ConjMultofFloat2
(
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rx
*
3
+
0
]
,
cSim
.
pEikr
[
atom
*
kmax
*
3
-
ry
*
3
+
1
]);
}
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
// next one is scary!
index
=
rx
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
)
+
(
ry
+
numRy
-
1
)
*
(
numRz
*
2
-
1
)
+
(
rz
+
numRz
-
1
);
if
(
rz
>=
0
)
{
//tab_qxyz[n] = atomParameters[n][QIndex] * (tab_xy[n] * EIR(rz, n, 2));
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
]
=
FloatMultFloat2
(
apos
.
w
,
MultofFloat2
(
tab_xy
,
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rz
*
3
+
2
]
));
}
else
{
// tab_qxyz[n] = atomParameters[n][QIndex] * (tab_xy[n] * conj(EIR(-rz, n, 2)));
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
]
=
FloatMultFloat2
(
apos
.
w
,
ConjMultofFloat2
(
tab_xy
,
cSim
.
pEikr
[
atom
*
kmax
*
3
-
rz
*
3
+
2
]
));
}
cSim
.
pCosSinSum
[
index
].
x
+=
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
x
;
cSim
.
pCosSinSum
[
index
].
y
+=
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
y
;
lowrz
=
1
-
numRz
;
}
lowry
=
1
-
numRy
;
}
}
// **********************************************************************
atom
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
__global__
void
kCalculateEwaldFastForces_kernel
()
{
float
PI
=
3
.
14159265358979323846
f
;
// hard-coded maximum k-vectors, no interface yet
// int kmax = cSim.kmax;
const
float
epsilon
=
1
.
0
;
float
recipCoeff
=
(
4
*
PI
/
cSim
.
V
/
epsilon
);
// float2 eikr;
// float4 apos;
int
lowry
=
0
;
int
lowrz
=
1
;
int
numRx
=
20
+
1
;
int
numRy
=
20
+
1
;
int
numRz
=
20
+
1
;
unsigned
int
totalK
=
(
numRx
*
2
-
1
)
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
);
int
index
;
unsigned
int
atom
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
atom
<
cSim
.
atoms
)
{
// apos = cSim.pPosq[atom];
// **********************************************************************
// cSim.pEikr[atom*kmax*3 + k*3 + m]
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
float
kx
=
rx
*
cSim
.
recipBoxSizeX
;
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
float
ky
=
ry
*
cSim
.
recipBoxSizeY
;
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
float
kz
=
rz
*
cSim
.
recipBoxSizeZ
;
// next one is scary!
index
=
rx
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
)
+
(
ry
+
numRy
-
1
)
*
(
numRz
*
2
-
1
)
+
(
rz
+
numRz
-
1
);
float
k2
=
kx
*
kx
+
ky
*
ky
+
kz
*
kz
;
float
ak
=
exp
(
k2
*
cSim
.
factorEwald
)
/
k2
;
float
dEdR
=
ak
*
(
cSim
.
pCosSinSum
[
index
].
x
*
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
y
-
cSim
.
pCosSinSum
[
index
].
y
*
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
x
);
cSim
.
pForce4
[
atom
].
x
+=
2
*
recipCoeff
*
dEdR
*
kx
;
cSim
.
pForce4
[
atom
].
y
+=
2
*
recipCoeff
*
dEdR
*
ky
;
cSim
.
pForce4
[
atom
].
z
+=
2
*
recipCoeff
*
dEdR
*
kz
;
lowrz
=
1
-
numRz
;
}
lowry
=
1
-
numRy
;
}
}
// **********************************************************************
atom
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
platforms/cuda/src/kernels/kCalculateCDLJEwaldReciprocal.h
View file @
353529b1
...
@@ -32,7 +32,6 @@
...
@@ -32,7 +32,6 @@
__global__
void
kCalculateCDLJEwaldReciprocalForces_kernel
()
__global__
void
kCalculateCDLJEwaldReciprocalForces_kernel
()
{
{
float
alphaEwald
=
cSim
.
alphaEwald
;
float
eps0
=
5.72765E-4
;
float
eps0
=
5.72765E-4
;
int
numRx
=
20
+
1
;
int
numRx
=
20
+
1
;
...
@@ -46,13 +45,15 @@ __global__ void kCalculateCDLJEwaldReciprocalForces_kernel()
...
@@ -46,13 +45,15 @@ __global__ void kCalculateCDLJEwaldReciprocalForces_kernel()
float
V
=
cSim
.
cellVolume
;
float
V
=
cSim
.
cellVolume
;
float4
apos1
,
apos2
;
float4
apos1
,
apos2
;
// i nteracting atoms
float4
af
;
// atomic force
unsigned
int
atomID1
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
atomID1
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
atomID1
<
cSim
.
stride
*
cSim
.
outputBuffer
s
)
while
(
atomID1
<
cSim
.
atom
s
)
{
{
apos1
=
cSim
.
pPosq
[
atomID1
];
apos1
=
cSim
.
pPosq
[
atomID1
];
af
=
cSim
.
pForce4
[
atomID1
];
unsigned
int
atomID2
=
0
;
unsigned
int
atomID2
=
0
;
while
(
atomID2
<
cSim
.
atoms
)
while
(
atomID2
<
cSim
.
atoms
)
...
@@ -85,9 +86,9 @@ __global__ void kCalculateCDLJEwaldReciprocalForces_kernel()
...
@@ -85,9 +86,9 @@ __global__ void kCalculateCDLJEwaldReciprocalForces_kernel()
CosI
=
cos
(
kx
*
apos1
.
x
+
ky
*
apos1
.
y
+
kz
*
apos1
.
z
);
CosI
=
cos
(
kx
*
apos1
.
x
+
ky
*
apos1
.
y
+
kz
*
apos1
.
z
);
CosJ
=
cos
(
kx
*
apos2
.
x
+
ky
*
apos2
.
y
+
kz
*
apos2
.
z
);
CosJ
=
cos
(
kx
*
apos2
.
x
+
ky
*
apos2
.
y
+
kz
*
apos2
.
z
);
cSim
.
pForce4
[
atomID1
]
.
x
-=
(
2
.
0
/
(
V
*
eps0
))
*
Qi
*
(
kx
/
k2
)
*
ek
*
(
-
SinI
*
Qj
*
CosJ
+
CosI
*
Qj
*
SinJ
);
af
.
x
-=
(
2
.
0
/
(
V
*
eps0
))
*
Qi
*
(
kx
/
k2
)
*
ek
*
(
-
SinI
*
Qj
*
CosJ
+
CosI
*
Qj
*
SinJ
);
cSim
.
pForce4
[
atomID1
]
.
y
-=
(
2
.
0
/
(
V
*
eps0
))
*
Qi
*
(
ky
/
k2
)
*
ek
*
(
-
SinI
*
Qj
*
CosJ
+
CosI
*
Qj
*
SinJ
);
af
.
y
-=
(
2
.
0
/
(
V
*
eps0
))
*
Qi
*
(
ky
/
k2
)
*
ek
*
(
-
SinI
*
Qj
*
CosJ
+
CosI
*
Qj
*
SinJ
);
cSim
.
pForce4
[
atomID1
]
.
z
-=
(
2
.
0
/
(
V
*
eps0
))
*
Qi
*
(
kz
/
k2
)
*
ek
*
(
-
SinI
*
Qj
*
CosJ
+
CosI
*
Qj
*
SinJ
);
af
.
z
-=
(
2
.
0
/
(
V
*
eps0
))
*
Qi
*
(
kz
/
k2
)
*
ek
*
(
-
SinI
*
Qj
*
CosJ
+
CosI
*
Qj
*
SinJ
);
lowrz
=
1
-
numRz
;
lowrz
=
1
-
numRz
;
}
}
...
@@ -98,7 +99,8 @@ __global__ void kCalculateCDLJEwaldReciprocalForces_kernel()
...
@@ -98,7 +99,8 @@ __global__ void kCalculateCDLJEwaldReciprocalForces_kernel()
atomID2
++
;
atomID2
++
;
}
}
cSim
.
pForce4
[
atomID1
]
=
af
;
atomID1
+=
blockDim
.
x
*
gridDim
.
x
;
atomID1
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
...
...
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
View file @
353529b1
...
@@ -119,6 +119,7 @@ void GetCalculateCDLJForcesSim(gpuContext gpu)
...
@@ -119,6 +119,7 @@ void GetCalculateCDLJForcesSim(gpuContext gpu)
// Reciprocal Space Ewald summation is in a separate kernel
// Reciprocal Space Ewald summation is in a separate kernel
#include "kCalculateCDLJEwaldReciprocal.h"
#include "kCalculateCDLJEwaldReciprocal.h"
#include "kCalculateCDLJEwaldFastReciprocal.h"
__global__
extern
void
kCalculateCDLJCutoffForces_12_kernel
();
__global__
extern
void
kCalculateCDLJCutoffForces_12_kernel
();
...
@@ -198,6 +199,7 @@ void kCalculateCDLJForces(gpuContext gpu)
...
@@ -198,6 +199,7 @@ void kCalculateCDLJForces(gpuContext gpu)
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
if
(
gpu
->
bOutputBufferPerWarp
)
if
(
gpu
->
bOutputBufferPerWarp
)
{
{
// Vanilla (N2) Ewald
kCalculateCDLJEwaldDirectByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kCalculateCDLJEwaldDirectByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
kCalculateCDLJEwaldReciprocalForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
kCalculateCDLJEwaldReciprocalForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
...
@@ -205,11 +207,20 @@ void kCalculateCDLJForces(gpuContext gpu)
...
@@ -205,11 +207,20 @@ void kCalculateCDLJForces(gpuContext gpu)
}
}
else
else
{
{
// Vanilla (N2) Ewald
kCalculateCDLJEwaldDirectForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kCalculateCDLJEwaldDirectForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJEwaldDirectForces"
);
LAUNCHERROR
(
"kCalculateCDLJEwaldDirectForces"
);
kCalculateCDLJEwaldReciprocalForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
kCalculateCDLJEwaldReciprocalForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateCDLJEwaldReciprocalForces"
);
LAUNCHERROR
(
"kCalculateCDLJEwaldReciprocalForces"
);
// If using Fast Ewald, uncomment the lines below
// kCalculateEwaldFastEikr_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastEikr");
// kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastCosSinSums");
// kCalculateEwaldFastForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastForces");
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment