Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
96f3680d
Commit
96f3680d
authored
Jun 30, 2009
by
Peter Eastman
Browse files
Fixed CUDA implementation of O(n^3/2) Ewald
parent
86a4baba
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
90 additions
and
222 deletions
+90
-222
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+3
-4
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+20
-23
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+0
-1
platforms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
...orms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
+61
-181
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
+6
-13
No files found.
platforms/cuda/src/kernels/cudatypes.h
View file @
96f3680d
...
@@ -300,7 +300,7 @@ struct cudaGmxSimulation {
...
@@ -300,7 +300,7 @@ struct cudaGmxSimulation {
float
cellVolume
;
// Ewald parameter alpha (a.k.a. kappa)
float
cellVolume
;
// Ewald parameter alpha (a.k.a. kappa)
float
alphaEwald
;
// Ewald parameter alpha (a.k.a. kappa)
float
alphaEwald
;
// Ewald parameter alpha (a.k.a. kappa)
float
factorEwald
;
// - 1 ( 4 * alphaEwald * alphaEwald)
float
factorEwald
;
// - 1 ( 4 * alphaEwald * alphaEwald)
float
kmax
;
// Maximum number of reciprocal vectors
int
kmax
;
// Maximum number of reciprocal vectors
float
reactionFieldK
;
// Constant for reaction field correction
float
reactionFieldK
;
// Constant for reaction field correction
float
probeRadius
;
// SASA probe radius
float
probeRadius
;
// SASA probe radius
float
surfaceAreaFactor
;
// ACE approximation surface area factor
float
surfaceAreaFactor
;
// ACE approximation surface area factor
...
@@ -337,9 +337,8 @@ struct cudaGmxSimulation {
...
@@ -337,9 +337,8 @@ struct cudaGmxSimulation {
float
collisionProbability
;
// Collision probability for Andersen thermostat
float
collisionProbability
;
// Collision probability for Andersen thermostat
float2
*
pObcData
;
// Pointer to fixed Born data
float2
*
pObcData
;
// Pointer to fixed Born data
float2
*
pAttr
;
// Pointer to additional atom attributes (sig, eps)
float2
*
pAttr
;
// Pointer to additional atom attributes (sig, eps)
float2
*
pEikr
;
// Pointer to exponents of reciprocal vectors and atom coordinates (ewald)
float2
*
pEwaldEikr
;
// Pointer to exponents of reciprocal vectors and atom coordinates (ewald)
float2
*
pStructureFactor
;
// Pointer to the structure factors (ewald)
float2
*
pEwaldCosSinSum
;
// Pointer to the cos/sin sums (ewald)
float2
*
pCosSinSum
;
// Pointer to the cos/sin sums (ewald)
unsigned
int
bonds
;
// Number of bonds
unsigned
int
bonds
;
// Number of bonds
int4
*
pBondID
;
// Bond atom and output buffer IDs
int4
*
pBondID
;
// Bond atom and output buffer IDs
float2
*
pBondParameter
;
// Bond parameters
float2
*
pBondParameter
;
// Bond parameters
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
96f3680d
...
@@ -429,18 +429,13 @@ void gpuSetEwaldParameters(gpuContext gpu)//, float alphaEwald, int kmax )
...
@@ -429,18 +429,13 @@ void gpuSetEwaldParameters(gpuContext gpu)//, float alphaEwald, int kmax )
// hard coded alphaEwald and kmax, no interface yet
// hard coded alphaEwald and kmax, no interface yet
float
alpha
=
3.123413
f
;
float
alpha
=
3.123413
f
;
float
PI
=
3.14159265358979323846
f
;
gpu
->
sim
.
alphaEwald
=
alpha
;
float
TWO_PI
=
2.0
f
*
PI
;
gpu
->
sim
.
factorEwald
=
-
1
/
(
4
*
alpha
*
alpha
);
gpu
->
sim
.
kmax
=
20
+
1
;
gpu
->
sim
.
recipBoxSizeX
=
TWO_PI
/
gpu
->
sim
.
periodicBoxSizeX
;
gpu
->
psEwaldEikr
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
gpu
->
sim
.
kmax
,
1
,
"EwaldEikr"
);
gpu
->
sim
.
recipBoxSizeY
=
TWO_PI
/
gpu
->
sim
.
periodicBoxSizeY
;
gpu
->
sim
.
pEwaldEikr
=
gpu
->
psEwaldEikr
->
_pDevStream
[
0
];
gpu
->
sim
.
recipBoxSizeZ
=
TWO_PI
/
gpu
->
sim
.
periodicBoxSizeZ
;
gpu
->
psEwaldCosSinSum
=
new
CUDAStream
<
float2
>
((
gpu
->
sim
.
kmax
*
2
-
1
)
*
(
gpu
->
sim
.
kmax
*
2
-
1
)
*
(
gpu
->
sim
.
kmax
*
2
-
1
),
1
,
"EwaldCosSinSum"
);
gpu
->
sim
.
pEwaldCosSinSum
=
gpu
->
psEwaldCosSinSum
->
_pDevStream
[
0
];
gpu
->
sim
.
cellVolume
=
gpu
->
sim
.
periodicBoxSizeX
*
gpu
->
sim
.
periodicBoxSizeY
*
gpu
->
sim
.
periodicBoxSizeZ
;
gpu
->
sim
.
alphaEwald
=
alpha
;
gpu
->
sim
.
factorEwald
=
-
1
/
(
4
*
alpha
*
alpha
);
gpu
->
sim
.
kmax
=
20
+
1
;
}
}
extern
"C"
extern
"C"
...
@@ -449,6 +444,10 @@ void gpuSetPeriodicBoxSize(gpuContext gpu, float xsize, float ysize, float zsize
...
@@ -449,6 +444,10 @@ void gpuSetPeriodicBoxSize(gpuContext gpu, float xsize, float ysize, float zsize
gpu
->
sim
.
periodicBoxSizeX
=
xsize
;
gpu
->
sim
.
periodicBoxSizeX
=
xsize
;
gpu
->
sim
.
periodicBoxSizeY
=
ysize
;
gpu
->
sim
.
periodicBoxSizeY
=
ysize
;
gpu
->
sim
.
periodicBoxSizeZ
=
zsize
;
gpu
->
sim
.
periodicBoxSizeZ
=
zsize
;
gpu
->
sim
.
recipBoxSizeX
=
2.0
f
*
PI
/
gpu
->
sim
.
periodicBoxSizeX
;
gpu
->
sim
.
recipBoxSizeY
=
2.0
f
*
PI
/
gpu
->
sim
.
periodicBoxSizeY
;
gpu
->
sim
.
recipBoxSizeZ
=
2.0
f
*
PI
/
gpu
->
sim
.
periodicBoxSizeZ
;
gpu
->
sim
.
cellVolume
=
gpu
->
sim
.
periodicBoxSizeX
*
gpu
->
sim
.
periodicBoxSizeY
*
gpu
->
sim
.
periodicBoxSizeZ
;
}
}
extern
"C"
extern
"C"
...
@@ -1002,12 +1001,6 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
...
@@ -1002,12 +1001,6 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
gpu
->
sim
.
pObcChain
=
gpu
->
psObcChain
->
_pDevStream
[
0
];
gpu
->
sim
.
pObcChain
=
gpu
->
psObcChain
->
_pDevStream
[
0
];
gpu
->
psSigEps2
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"SigEps2"
);
gpu
->
psSigEps2
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"SigEps2"
);
gpu
->
sim
.
pAttr
=
gpu
->
psSigEps2
->
_pDevStream
[
0
];
gpu
->
sim
.
pAttr
=
gpu
->
psSigEps2
->
_pDevStream
[
0
];
gpu
->
psEwaldEikr
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"EwaldEikr"
);
gpu
->
sim
.
pEikr
=
gpu
->
psEwaldEikr
->
_pDevStream
[
0
];
gpu
->
psEwaldStructureFactor
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"EwaldStructureFactor"
);
gpu
->
sim
.
pStructureFactor
=
gpu
->
psEwaldStructureFactor
->
_pDevStream
[
0
];
gpu
->
psEwaldCosSinSum
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"EwaldCosSinSum"
);
gpu
->
sim
.
pCosSinSum
=
gpu
->
psEwaldCosSinSum
->
_pDevStream
[
0
];
gpu
->
psObcData
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"ObcData"
);
gpu
->
psObcData
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"ObcData"
);
gpu
->
sim
.
pObcData
=
gpu
->
psObcData
->
_pDevStream
[
0
];
gpu
->
sim
.
pObcData
=
gpu
->
psObcData
->
_pDevStream
[
0
];
gpu
->
psStepSize
=
new
CUDAStream
<
float2
>
(
1
,
1
,
"StepSize"
);
gpu
->
psStepSize
=
new
CUDAStream
<
float2
>
(
1
,
1
,
"StepSize"
);
...
@@ -1281,6 +1274,8 @@ void* gpuInit(int numAtoms)
...
@@ -1281,6 +1274,8 @@ void* gpuInit(int numAtoms)
gpu
->
psRbDihedralParameter2
=
NULL
;
gpu
->
psRbDihedralParameter2
=
NULL
;
gpu
->
psLJ14ID
=
NULL
;
gpu
->
psLJ14ID
=
NULL
;
gpu
->
psLJ14Parameter
=
NULL
;
gpu
->
psLJ14Parameter
=
NULL
;
gpu
->
psEwaldEikr
=
NULL
;
gpu
->
psEwaldCosSinSum
=
NULL
;
gpu
->
psShakeID
=
NULL
;
gpu
->
psShakeID
=
NULL
;
gpu
->
psShakeParameter
=
NULL
;
gpu
->
psShakeParameter
=
NULL
;
gpu
->
psSettleID
=
NULL
;
gpu
->
psSettleID
=
NULL
;
...
@@ -1408,11 +1403,13 @@ void gpuShutDown(gpuContext gpu)
...
@@ -1408,11 +1403,13 @@ void gpuShutDown(gpuContext gpu)
delete
gpu
->
psForce4
;
delete
gpu
->
psForce4
;
delete
gpu
->
psxVector4
;
delete
gpu
->
psxVector4
;
delete
gpu
->
psvVector4
;
delete
gpu
->
psvVector4
;
delete
gpu
->
psSigEps2
;
delete
gpu
->
psSigEps2
;
delete
gpu
->
psEwaldEikr
;
if
(
gpu
->
psEwaldEikr
!=
NULL
)
delete
gpu
->
psEwaldStructureFactor
;
{
delete
gpu
->
psEwaldCosSinSum
;
delete
gpu
->
psEwaldEikr
;
delete
gpu
->
psObcData
;
delete
gpu
->
psEwaldCosSinSum
;
}
delete
gpu
->
psObcData
;
delete
gpu
->
psObcChain
;
delete
gpu
->
psObcChain
;
delete
gpu
->
psBornForce
;
delete
gpu
->
psBornForce
;
delete
gpu
->
psBornRadii
;
delete
gpu
->
psBornRadii
;
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
96f3680d
...
@@ -87,7 +87,6 @@ struct _gpuContext {
...
@@ -87,7 +87,6 @@ struct _gpuContext {
CUDAStream
<
float4
>*
psvVector4
;
CUDAStream
<
float4
>*
psvVector4
;
CUDAStream
<
float2
>*
psSigEps2
;
CUDAStream
<
float2
>*
psSigEps2
;
CUDAStream
<
float2
>*
psEwaldEikr
;
CUDAStream
<
float2
>*
psEwaldEikr
;
CUDAStream
<
float2
>*
psEwaldStructureFactor
;
CUDAStream
<
float2
>*
psEwaldCosSinSum
;
CUDAStream
<
float2
>*
psEwaldCosSinSum
;
CUDAStream
<
float2
>*
psObcData
;
CUDAStream
<
float2
>*
psObcData
;
CUDAStream
<
float
>*
psObcChain
;
CUDAStream
<
float
>*
psObcChain
;
...
...
platforms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
View file @
96f3680d
...
@@ -44,28 +44,10 @@
...
@@ -44,28 +44,10 @@
}
}
__device__
float2
FloatMultFloat2
(
float
r
,
float2
a
)
{
float2
b
;
b
.
x
=
r
*
a
.
x
;
b
.
y
=
r
*
a
.
y
;
return
b
;
}
__device__
float2
FloatMultConjFloat2
(
float
r
,
float2
a
)
{
float2
b
;
b
.
x
=
r
*
a
.
y
;
b
.
y
=
r
*
a
.
x
;
return
b
;
}
__global__
void
kCalculateEwaldFastEikr_kernel
()
__global__
void
kCalculateEwaldFastEikr_kernel
()
{
{
int
kmax
=
cSim
.
kmax
;
int
kmax
=
cSim
.
kmax
;
float4
apos
;
float4
apos
;
unsigned
int
atom
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
atom
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
...
@@ -81,23 +63,23 @@ __global__ void kCalculateEwaldFastEikr_kernel()
...
@@ -81,23 +63,23 @@ __global__ void kCalculateEwaldFastEikr_kernel()
// k = 0, explicitly
// k = 0, explicitly
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
cSim
.
pEikr
[
atom
*
kmax
*
3
+
0
+
m
].
x
=
1
;
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
0
+
m
].
x
=
1
;
cSim
.
pEikr
[
atom
*
kmax
*
3
+
0
+
m
].
y
=
0
;
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
0
+
m
].
y
=
0
;
}
}
// k = 1, explicitly
// k = 1, explicitly
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
0
].
x
=
cos
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
3
+
0
].
x
=
cos
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
0
].
y
=
sin
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
3
+
0
].
y
=
sin
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
1
].
x
=
cos
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
3
+
1
].
x
=
cos
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
1
].
y
=
sin
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
3
+
1
].
y
=
sin
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
2
].
x
=
cos
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
3
+
2
].
x
=
cos
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
2
].
y
=
sin
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
3
+
2
].
y
=
sin
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
// k > 1, by recursion
// k > 1, by recursion
for
(
unsigned
int
k
=
2
;
(
k
<
kmax
);
k
++
)
{
for
(
unsigned
int
k
=
2
;
(
k
<
kmax
);
k
++
)
{
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
cSim
.
pEikr
[
atom
*
kmax
*
3
+
k
*
3
+
m
]
=
MultofFloat2
(
cSim
.
pEikr
[
atom
*
kmax
*
3
+
(
k
-
1
)
*
3
+
m
]
,
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
m
]);
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
k
*
3
+
m
]
=
MultofFloat2
(
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
(
k
-
1
)
*
3
+
m
]
,
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
3
+
m
]);
}
}
}
}
...
@@ -105,176 +87,74 @@ __global__ void kCalculateEwaldFastEikr_kernel()
...
@@ -105,176 +87,74 @@ __global__ void kCalculateEwaldFastEikr_kernel()
}
}
}
}
__global__
void
kCalculateEwaldFastStructureFactors_kernel
()
{
// hard-coded maximum k-vectors, no interface yet
int
kmax
=
cSim
.
kmax
;
float4
apos
;
int
lowry
=
0
;
int
lowrz
=
1
;
int
numRx
=
20
+
1
;
int
numRy
=
20
+
1
;
int
numRz
=
20
+
1
;
unsigned
int
totalK
=
(
numRx
*
2
-
1
)
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
);
float2
tab_xy
;
int
index
;
unsigned
int
atom
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
atom
<
cSim
.
atoms
)
{
apos
=
cSim
.
pPosq
[
atom
];
// cSim.pEikr[atom*kmax*3 + k*3 + m]
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
if
(
ry
>=
0
)
{
tab_xy
=
MultofFloat2
(
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rx
*
3
+
0
]
,
cSim
.
pEikr
[
atom
*
kmax
*
3
+
ry
*
3
+
1
]);
}
else
{
tab_xy
=
ConjMultofFloat2
(
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rx
*
3
+
0
]
,
cSim
.
pEikr
[
atom
*
kmax
*
3
-
ry
*
3
+
1
]);
}
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
index
=
rx
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
)
+
(
ry
+
numRy
-
1
)
*
(
numRz
*
2
-
1
)
+
(
rz
+
numRz
-
1
);
if
(
rz
>=
0
)
{
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
]
=
FloatMultFloat2
(
(
apos
.
w
),
MultofFloat2
(
tab_xy
,
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rz
*
3
+
2
]
));
}
else
{
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
]
=
FloatMultFloat2
(
(
apos
.
w
),
ConjMultofFloat2
(
tab_xy
,
cSim
.
pEikr
[
atom
*
kmax
*
3
-
rz
*
3
+
2
]
));
}
cSim
.
pCosSinSum
[
index
].
x
=
0
.
0
;
cSim
.
pCosSinSum
[
index
].
y
=
0
.
0
;
lowrz
=
1
-
numRz
;
}
lowry
=
1
-
numRy
;
}
}
atom
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
__global__
void
kCalculateEwaldFastCosSinSums_kernel
()
__global__
void
kCalculateEwaldFastCosSinSums_kernel
()
{
{
const
unsigned
int
ksize
=
2
*
cSim
.
kmax
-
1
;
// float2 eikr;
const
unsigned
int
totalK
=
ksize
*
ksize
*
ksize
;
int
lowry
=
0
;
unsigned
int
index
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
int
lowrz
=
1
;
while
(
index
<
totalK
)
int
numRx
=
20
+
1
;
int
numRy
=
20
+
1
;
int
numRz
=
20
+
1
;
unsigned
int
totalK
=
(
numRx
*
2
-
1
)
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
);
int
index
;
unsigned
int
rx
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
rx
<
numRx
)
{
{
// **********************************************************************
int
rx
=
index
/
(
ksize
*
ksize
);
int
remainder
=
index
-
rx
*
ksize
*
ksize
;
// cSim.pEikr[atom*kmax*3 + k*3 + m]
int
ry
=
remainder
/
ksize
;
int
rz
=
remainder
-
ry
*
ksize
-
cSim
.
kmax
+
1
;
// for(int rx = 0; rx < numRx; rx++) {
ry
+=
-
cSim
.
kmax
+
1
;
float2
sum
=
make_float2
(
0
.
0
f
,
0
.
0
f
);
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
for
(
int
atom
=
0
;
atom
<
cSim
.
atoms
;
atom
++
)
{
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
float2
tab_xy
=
(
ry
>=
0
?
MultofFloat2
(
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
rx
*
3
+
0
],
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
ry
*
3
+
1
])
:
ConjMultofFloat2
(
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
rx
*
3
+
0
],
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
-
ry
*
3
+
1
]));
index
=
rx
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
)
+
(
ry
+
numRy
-
1
)
*
(
numRz
*
2
-
1
)
+
(
rz
+
numRz
-
1
);
float
charge
=
cSim
.
pPosq
[
atom
].
w
;
float2
structureFactor
=
(
rz
>=
0
?
MultofFloat2
(
tab_xy
,
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
rz
*
3
+
2
])
:
for
(
int
atom
=
0
;
atom
<
cSim
.
atoms
;
atom
++
)
ConjMultofFloat2
(
tab_xy
,
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
-
rz
*
3
+
2
]));
{
sum
.
x
+=
charge
*
structureFactor
.
x
;
cSim
.
pCosSinSum
[
index
].
x
+=
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
x
;
sum
.
y
+=
charge
*
structureFactor
.
y
;
cSim
.
pCosSinSum
[
index
].
y
+=
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
y
;
}
lowrz
=
1
-
numRz
;
}
}
lowry
=
1
-
numRy
;
cSim
.
pEwaldCosSinSum
[
index
]
=
sum
;
}
index
+=
blockDim
.
x
*
gridDim
.
x
;
rx
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
}
}
__global__
void
kCalculateEwaldFastForces_kernel
()
__global__
void
kCalculateEwaldFastForces_kernel
()
{
{
float
PI
=
3
.
14159265358979323846
f
;
float
PI
=
3
.
14159265358979323846
f
;
const
float
epsilon
=
1
.
0
;
const
float
epsilon
=
1
.
0
;
float
recipCoeff
=
(
4
*
PI
/
cSim
.
V
/
epsilon
);
float
recipCoeff
=
cSim
.
epsfac
*
(
4
*
PI
/
cSim
.
cellVolume
/
epsilon
);
int
lowry
=
0
;
int
lowry
=
0
;
int
lowrz
=
1
;
int
lowrz
=
1
;
int
numRx
=
20
+
1
;
const
int
numRx
=
cSim
.
kmax
;
int
numRy
=
20
+
1
;
const
int
numRy
=
cSim
.
kmax
;
int
numRz
=
20
+
1
;
const
int
numRz
=
cSim
.
kmax
;
unsigned
int
totalK
=
(
numRx
*
2
-
1
)
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
);
int
index
;
unsigned
int
atom
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
atom
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
atom
<
cSim
.
atoms
)
while
(
atom
<
cSim
.
atoms
)
{
{
float
charge
=
cSim
.
pPosq
[
atom
].
w
;
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
float
kx
=
rx
*
cSim
.
recipBoxSizeX
;
float
kx
=
rx
*
cSim
.
recipBoxSizeX
;
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
float
ky
=
ry
*
cSim
.
recipBoxSizeY
;
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
float2
tab_xy
=
(
ry
>=
0
?
MultofFloat2
(
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
rx
*
3
+
0
],
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
ry
*
3
+
1
])
:
ConjMultofFloat2
(
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
rx
*
3
+
0
],
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
-
ry
*
3
+
1
]));
float
ky
=
ry
*
cSim
.
recipBoxSizeY
;
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
float
kz
=
rz
*
cSim
.
recipBoxSizeZ
;
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
int
index
=
rx
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
)
+
(
ry
+
numRy
-
1
)
*
(
numRz
*
2
-
1
)
+
(
rz
+
numRz
-
1
);
float
k2
=
kx
*
kx
+
ky
*
ky
+
kz
*
kz
;
float
kz
=
rz
*
cSim
.
recipBoxSizeZ
;
float
ak
=
exp
(
k2
*
cSim
.
factorEwald
)
/
k2
;
float2
structureFactor
=
(
rz
>=
0
?
MultofFloat2
(
tab_xy
,
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
rz
*
3
+
2
])
:
// next one is scary!
ConjMultofFloat2
(
tab_xy
,
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
-
rz
*
3
+
2
]));
index
=
rx
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
)
+
(
ry
+
numRy
-
1
)
*
(
numRz
*
2
-
1
)
+
(
rz
+
numRz
-
1
);
float
dEdR
=
ak
*
charge
*
(
cSim
.
pEwaldCosSinSum
[
index
].
x
*
structureFactor
.
y
-
cSim
.
pEwaldCosSinSum
[
index
].
y
*
structureFactor
.
x
);
cSim
.
pForce4
[
atom
].
x
+=
2
*
recipCoeff
*
dEdR
*
kx
;
float
k2
=
kx
*
kx
+
ky
*
ky
+
kz
*
kz
;
cSim
.
pForce4
[
atom
].
y
+=
2
*
recipCoeff
*
dEdR
*
ky
;
float
ak
=
exp
(
k2
*
cSim
.
factorEwald
)
/
k2
;
cSim
.
pForce4
[
atom
].
z
+=
2
*
recipCoeff
*
dEdR
*
kz
;
lowrz
=
1
-
numRz
;
float
dEdR
=
ak
*
(
cSim
.
pCosSinSum
[
index
].
x
*
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
y
-
cSim
.
pCosSinSum
[
index
].
y
*
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
x
);
}
lowry
=
1
-
numRy
;
cSim
.
pForce4
[
atom
].
x
+=
2
*
recipCoeff
*
dEdR
*
kx
;
}
cSim
.
pForce4
[
atom
].
y
+=
2
*
recipCoeff
*
dEdR
*
ky
;
cSim
.
pForce4
[
atom
].
z
+=
2
*
recipCoeff
*
dEdR
*
kz
;
lowrz
=
1
-
numRz
;
}
}
lowry
=
1
-
numRy
;
atom
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
atom
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
}
}
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
View file @
96f3680d
...
@@ -207,22 +207,15 @@ void kCalculateCDLJForces(gpuContext gpu)
...
@@ -207,22 +207,15 @@ void kCalculateCDLJForces(gpuContext gpu)
}
}
else
else
{
{
// Vanilla (N2) Ewald
kCalculateCDLJEwaldDirectForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kCalculateCDLJEwaldDirectForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJEwaldDirectForces"
);
LAUNCHERROR
(
"kCalculateCDLJEwaldDirectForces"
);
kCalculateCDLJEwaldReciprocalForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
kCalculateEwaldFastEikr_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateCDLJEwaldReciprocalForces"
);
LAUNCHERROR
(
"kCalculateEwaldFastEikr"
);
// If using Fast Ewald, uncomment the lines below
kCalculateEwaldFastCosSinSums_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateEwaldFastCosSinSums"
);
// kCalculateEwaldFastEikr_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
kCalculateEwaldFastForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
// LAUNCHERROR("kCalculateEwaldFastEikr");
LAUNCHERROR
(
"kCalculateEwaldFastForces"
);
// kCalculateEwaldFastStructureFactors_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastStructureFactors_kernel");
// kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastCosSinSums");
// kCalculateEwaldFastForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastForces");
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment