Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
96f3680d
Commit
96f3680d
authored
Jun 30, 2009
by
Peter Eastman
Browse files
Fixed CUDA implementation of O(n^3/2) Ewald
parent
86a4baba
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
90 additions
and
222 deletions
+90
-222
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+3
-4
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+20
-23
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+0
-1
platforms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
...orms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
+61
-181
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
+6
-13
No files found.
platforms/cuda/src/kernels/cudatypes.h
View file @
96f3680d
...
...
@@ -300,7 +300,7 @@ struct cudaGmxSimulation {
float
cellVolume
;
// Ewald parameter alpha (a.k.a. kappa)
float
alphaEwald
;
// Ewald parameter alpha (a.k.a. kappa)
float
factorEwald
;
// - 1 ( 4 * alphaEwald * alphaEwald)
float
kmax
;
// Maximum number of reciprocal vectors
int
kmax
;
// Maximum number of reciprocal vectors
float
reactionFieldK
;
// Constant for reaction field correction
float
probeRadius
;
// SASA probe radius
float
surfaceAreaFactor
;
// ACE approximation surface area factor
...
...
@@ -337,9 +337,8 @@ struct cudaGmxSimulation {
float
collisionProbability
;
// Collision probability for Andersen thermostat
float2
*
pObcData
;
// Pointer to fixed Born data
float2
*
pAttr
;
// Pointer to additional atom attributes (sig, eps)
float2
*
pEikr
;
// Pointer to exponents of reciprocal vectors and atom coordinates (ewald)
float2
*
pStructureFactor
;
// Pointer to the structure factors (ewald)
float2
*
pCosSinSum
;
// Pointer to the cos/sin sums (ewald)
float2
*
pEwaldEikr
;
// Pointer to exponents of reciprocal vectors and atom coordinates (ewald)
float2
*
pEwaldCosSinSum
;
// Pointer to the cos/sin sums (ewald)
unsigned
int
bonds
;
// Number of bonds
int4
*
pBondID
;
// Bond atom and output buffer IDs
float2
*
pBondParameter
;
// Bond parameters
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
96f3680d
...
...
@@ -429,18 +429,13 @@ void gpuSetEwaldParameters(gpuContext gpu)//, float alphaEwald, int kmax )
// hard coded alphaEwald and kmax, no interface yet
float
alpha
=
3.123413
f
;
float
PI
=
3.14159265358979323846
f
;
float
TWO_PI
=
2.0
f
*
PI
;
gpu
->
sim
.
recipBoxSizeX
=
TWO_PI
/
gpu
->
sim
.
periodicBoxSizeX
;
gpu
->
sim
.
recipBoxSizeY
=
TWO_PI
/
gpu
->
sim
.
periodicBoxSizeY
;
gpu
->
sim
.
recipBoxSizeZ
=
TWO_PI
/
gpu
->
sim
.
periodicBoxSizeZ
;
gpu
->
sim
.
cellVolume
=
gpu
->
sim
.
periodicBoxSizeX
*
gpu
->
sim
.
periodicBoxSizeY
*
gpu
->
sim
.
periodicBoxSizeZ
;
gpu
->
sim
.
alphaEwald
=
alpha
;
gpu
->
sim
.
factorEwald
=
-
1
/
(
4
*
alpha
*
alpha
);
gpu
->
sim
.
kmax
=
20
+
1
;
gpu
->
sim
.
alphaEwald
=
alpha
;
gpu
->
sim
.
factorEwald
=
-
1
/
(
4
*
alpha
*
alpha
);
gpu
->
sim
.
kmax
=
20
+
1
;
gpu
->
psEwaldEikr
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
*
gpu
->
sim
.
kmax
,
1
,
"EwaldEikr"
);
gpu
->
sim
.
pEwaldEikr
=
gpu
->
psEwaldEikr
->
_pDevStream
[
0
];
gpu
->
psEwaldCosSinSum
=
new
CUDAStream
<
float2
>
((
gpu
->
sim
.
kmax
*
2
-
1
)
*
(
gpu
->
sim
.
kmax
*
2
-
1
)
*
(
gpu
->
sim
.
kmax
*
2
-
1
),
1
,
"EwaldCosSinSum"
);
gpu
->
sim
.
pEwaldCosSinSum
=
gpu
->
psEwaldCosSinSum
->
_pDevStream
[
0
];
}
extern
"C"
...
...
@@ -449,6 +444,10 @@ void gpuSetPeriodicBoxSize(gpuContext gpu, float xsize, float ysize, float zsize
gpu
->
sim
.
periodicBoxSizeX
=
xsize
;
gpu
->
sim
.
periodicBoxSizeY
=
ysize
;
gpu
->
sim
.
periodicBoxSizeZ
=
zsize
;
gpu
->
sim
.
recipBoxSizeX
=
2.0
f
*
PI
/
gpu
->
sim
.
periodicBoxSizeX
;
gpu
->
sim
.
recipBoxSizeY
=
2.0
f
*
PI
/
gpu
->
sim
.
periodicBoxSizeY
;
gpu
->
sim
.
recipBoxSizeZ
=
2.0
f
*
PI
/
gpu
->
sim
.
periodicBoxSizeZ
;
gpu
->
sim
.
cellVolume
=
gpu
->
sim
.
periodicBoxSizeX
*
gpu
->
sim
.
periodicBoxSizeY
*
gpu
->
sim
.
periodicBoxSizeZ
;
}
extern
"C"
...
...
@@ -1002,12 +1001,6 @@ int gpuAllocateInitialBuffers(gpuContext gpu)
gpu
->
sim
.
pObcChain
=
gpu
->
psObcChain
->
_pDevStream
[
0
];
gpu
->
psSigEps2
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"SigEps2"
);
gpu
->
sim
.
pAttr
=
gpu
->
psSigEps2
->
_pDevStream
[
0
];
gpu
->
psEwaldEikr
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"EwaldEikr"
);
gpu
->
sim
.
pEikr
=
gpu
->
psEwaldEikr
->
_pDevStream
[
0
];
gpu
->
psEwaldStructureFactor
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"EwaldStructureFactor"
);
gpu
->
sim
.
pStructureFactor
=
gpu
->
psEwaldStructureFactor
->
_pDevStream
[
0
];
gpu
->
psEwaldCosSinSum
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"EwaldCosSinSum"
);
gpu
->
sim
.
pCosSinSum
=
gpu
->
psEwaldCosSinSum
->
_pDevStream
[
0
];
gpu
->
psObcData
=
new
CUDAStream
<
float2
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"ObcData"
);
gpu
->
sim
.
pObcData
=
gpu
->
psObcData
->
_pDevStream
[
0
];
gpu
->
psStepSize
=
new
CUDAStream
<
float2
>
(
1
,
1
,
"StepSize"
);
...
...
@@ -1281,6 +1274,8 @@ void* gpuInit(int numAtoms)
gpu
->
psRbDihedralParameter2
=
NULL
;
gpu
->
psLJ14ID
=
NULL
;
gpu
->
psLJ14Parameter
=
NULL
;
gpu
->
psEwaldEikr
=
NULL
;
gpu
->
psEwaldCosSinSum
=
NULL
;
gpu
->
psShakeID
=
NULL
;
gpu
->
psShakeParameter
=
NULL
;
gpu
->
psSettleID
=
NULL
;
...
...
@@ -1408,11 +1403,13 @@ void gpuShutDown(gpuContext gpu)
delete
gpu
->
psForce4
;
delete
gpu
->
psxVector4
;
delete
gpu
->
psvVector4
;
delete
gpu
->
psSigEps2
;
delete
gpu
->
psEwaldEikr
;
delete
gpu
->
psEwaldStructureFactor
;
delete
gpu
->
psEwaldCosSinSum
;
delete
gpu
->
psObcData
;
delete
gpu
->
psSigEps2
;
if
(
gpu
->
psEwaldEikr
!=
NULL
)
{
delete
gpu
->
psEwaldEikr
;
delete
gpu
->
psEwaldCosSinSum
;
}
delete
gpu
->
psObcData
;
delete
gpu
->
psObcChain
;
delete
gpu
->
psBornForce
;
delete
gpu
->
psBornRadii
;
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
96f3680d
...
...
@@ -87,7 +87,6 @@ struct _gpuContext {
CUDAStream
<
float4
>*
psvVector4
;
CUDAStream
<
float2
>*
psSigEps2
;
CUDAStream
<
float2
>*
psEwaldEikr
;
CUDAStream
<
float2
>*
psEwaldStructureFactor
;
CUDAStream
<
float2
>*
psEwaldCosSinSum
;
CUDAStream
<
float2
>*
psObcData
;
CUDAStream
<
float
>*
psObcChain
;
...
...
platforms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
View file @
96f3680d
...
...
@@ -44,28 +44,10 @@
}
__device__
float2
FloatMultFloat2
(
float
r
,
float2
a
)
{
float2
b
;
b
.
x
=
r
*
a
.
x
;
b
.
y
=
r
*
a
.
y
;
return
b
;
}
__device__
float2
FloatMultConjFloat2
(
float
r
,
float2
a
)
{
float2
b
;
b
.
x
=
r
*
a
.
y
;
b
.
y
=
r
*
a
.
x
;
return
b
;
}
__global__
void
kCalculateEwaldFastEikr_kernel
()
{
int
kmax
=
cSim
.
kmax
;
int
kmax
=
cSim
.
kmax
;
float4
apos
;
unsigned
int
atom
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
...
...
@@ -81,23 +63,23 @@ __global__ void kCalculateEwaldFastEikr_kernel()
// k = 0, explicitly
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
cSim
.
pEikr
[
atom
*
kmax
*
3
+
0
+
m
].
x
=
1
;
cSim
.
pEikr
[
atom
*
kmax
*
3
+
0
+
m
].
y
=
0
;
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
0
+
m
].
x
=
1
;
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
0
+
m
].
y
=
0
;
}
// k = 1, explicitly
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
0
].
x
=
cos
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
0
].
y
=
sin
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
3
+
0
].
x
=
cos
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
3
+
0
].
y
=
sin
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
1
].
x
=
cos
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
1
].
y
=
sin
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
3
+
1
].
x
=
cos
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
3
+
1
].
y
=
sin
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
2
].
x
=
cos
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
2
].
y
=
sin
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
3
+
2
].
x
=
cos
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
3
+
2
].
y
=
sin
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
// k > 1, by recursion
for
(
unsigned
int
k
=
2
;
(
k
<
kmax
);
k
++
)
{
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
cSim
.
pEikr
[
atom
*
kmax
*
3
+
k
*
3
+
m
]
=
MultofFloat2
(
cSim
.
pEikr
[
atom
*
kmax
*
3
+
(
k
-
1
)
*
3
+
m
]
,
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
m
]);
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
k
*
3
+
m
]
=
MultofFloat2
(
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
(
k
-
1
)
*
3
+
m
]
,
cSim
.
pE
waldE
ikr
[
atom
*
kmax
*
3
+
3
+
m
]);
}
}
...
...
@@ -105,176 +87,74 @@ __global__ void kCalculateEwaldFastEikr_kernel()
}
}
__global__
void
kCalculateEwaldFastStructureFactors_kernel
()
{
// hard-coded maximum k-vectors, no interface yet
int
kmax
=
cSim
.
kmax
;
float4
apos
;
int
lowry
=
0
;
int
lowrz
=
1
;
int
numRx
=
20
+
1
;
int
numRy
=
20
+
1
;
int
numRz
=
20
+
1
;
unsigned
int
totalK
=
(
numRx
*
2
-
1
)
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
);
float2
tab_xy
;
int
index
;
unsigned
int
atom
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
atom
<
cSim
.
atoms
)
{
apos
=
cSim
.
pPosq
[
atom
];
// cSim.pEikr[atom*kmax*3 + k*3 + m]
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
if
(
ry
>=
0
)
{
tab_xy
=
MultofFloat2
(
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rx
*
3
+
0
]
,
cSim
.
pEikr
[
atom
*
kmax
*
3
+
ry
*
3
+
1
]);
}
else
{
tab_xy
=
ConjMultofFloat2
(
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rx
*
3
+
0
]
,
cSim
.
pEikr
[
atom
*
kmax
*
3
-
ry
*
3
+
1
]);
}
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
index
=
rx
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
)
+
(
ry
+
numRy
-
1
)
*
(
numRz
*
2
-
1
)
+
(
rz
+
numRz
-
1
);
if
(
rz
>=
0
)
{
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
]
=
FloatMultFloat2
(
(
apos
.
w
),
MultofFloat2
(
tab_xy
,
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rz
*
3
+
2
]
));
}
else
{
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
]
=
FloatMultFloat2
(
(
apos
.
w
),
ConjMultofFloat2
(
tab_xy
,
cSim
.
pEikr
[
atom
*
kmax
*
3
-
rz
*
3
+
2
]
));
}
cSim
.
pCosSinSum
[
index
].
x
=
0
.
0
;
cSim
.
pCosSinSum
[
index
].
y
=
0
.
0
;
lowrz
=
1
-
numRz
;
}
lowry
=
1
-
numRy
;
}
}
atom
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
__global__
void
kCalculateEwaldFastCosSinSums_kernel
()
{
// float2 eikr;
int
lowry
=
0
;
int
lowrz
=
1
;
int
numRx
=
20
+
1
;
int
numRy
=
20
+
1
;
int
numRz
=
20
+
1
;
unsigned
int
totalK
=
(
numRx
*
2
-
1
)
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
);
int
index
;
unsigned
int
rx
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
rx
<
numRx
)
const
unsigned
int
ksize
=
2
*
cSim
.
kmax
-
1
;
const
unsigned
int
totalK
=
ksize
*
ksize
*
ksize
;
unsigned
int
index
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
index
<
totalK
)
{
// **********************************************************************
// cSim.pEikr[atom*kmax*3 + k*3 + m]
// for(int rx = 0; rx < numRx; rx++) {
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
index
=
rx
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
)
+
(
ry
+
numRy
-
1
)
*
(
numRz
*
2
-
1
)
+
(
rz
+
numRz
-
1
);
for
(
int
atom
=
0
;
atom
<
cSim
.
atoms
;
atom
++
)
{
cSim
.
pCosSinSum
[
index
].
x
+=
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
x
;
cSim
.
pCosSinSum
[
index
].
y
+=
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
y
;
}
lowrz
=
1
-
numRz
;
int
rx
=
index
/
(
ksize
*
ksize
);
int
remainder
=
index
-
rx
*
ksize
*
ksize
;
int
ry
=
remainder
/
ksize
;
int
rz
=
remainder
-
ry
*
ksize
-
cSim
.
kmax
+
1
;
ry
+=
-
cSim
.
kmax
+
1
;
float2
sum
=
make_float2
(
0
.
0
f
,
0
.
0
f
);
for
(
int
atom
=
0
;
atom
<
cSim
.
atoms
;
atom
++
)
{
float2
tab_xy
=
(
ry
>=
0
?
MultofFloat2
(
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
rx
*
3
+
0
],
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
ry
*
3
+
1
])
:
ConjMultofFloat2
(
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
rx
*
3
+
0
],
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
-
ry
*
3
+
1
]));
float
charge
=
cSim
.
pPosq
[
atom
].
w
;
float2
structureFactor
=
(
rz
>=
0
?
MultofFloat2
(
tab_xy
,
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
rz
*
3
+
2
])
:
ConjMultofFloat2
(
tab_xy
,
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
-
rz
*
3
+
2
]));
sum
.
x
+=
charge
*
structureFactor
.
x
;
sum
.
y
+=
charge
*
structureFactor
.
y
;
}
lowry
=
1
-
numRy
;
}
rx
+=
blockDim
.
x
*
gridDim
.
x
;
cSim
.
pEwaldCosSinSum
[
index
]
=
sum
;
index
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
__global__
void
kCalculateEwaldFastForces_kernel
()
{
float
PI
=
3
.
14159265358979323846
f
;
const
float
epsilon
=
1
.
0
;
float
recipCoeff
=
(
4
*
PI
/
cSim
.
V
/
epsilon
);
float
PI
=
3
.
14159265358979323846
f
;
const
float
epsilon
=
1
.
0
;
float
recipCoeff
=
cSim
.
epsfac
*
(
4
*
PI
/
cSim
.
cellVolume
/
epsilon
);
int
lowry
=
0
;
int
lowrz
=
1
;
int
numRx
=
20
+
1
;
int
numRy
=
20
+
1
;
int
numRz
=
20
+
1
;
unsigned
int
totalK
=
(
numRx
*
2
-
1
)
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
);
int
index
;
const
int
numRx
=
cSim
.
kmax
;
const
int
numRy
=
cSim
.
kmax
;
const
int
numRz
=
cSim
.
kmax
;
unsigned
int
atom
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
atom
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
atom
<
cSim
.
atoms
)
{
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
float
kx
=
rx
*
cSim
.
recipBoxSizeX
;
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
float
ky
=
ry
*
cSim
.
recipBoxSizeY
;
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
float
kz
=
rz
*
cSim
.
recipBoxSizeZ
;
// next one is scary!
index
=
rx
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
)
+
(
ry
+
numRy
-
1
)
*
(
numRz
*
2
-
1
)
+
(
rz
+
numRz
-
1
);
float
k2
=
kx
*
kx
+
ky
*
ky
+
kz
*
kz
;
float
ak
=
exp
(
k2
*
cSim
.
factorEwald
)
/
k2
;
float
dEdR
=
ak
*
(
cSim
.
pCosSinSum
[
index
].
x
*
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
y
-
cSim
.
pCosSinSum
[
index
].
y
*
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
x
);
cSim
.
pForce4
[
atom
].
x
+=
2
*
recipCoeff
*
dEdR
*
kx
;
cSim
.
pForce4
[
atom
].
y
+=
2
*
recipCoeff
*
dEdR
*
ky
;
cSim
.
pForce4
[
atom
].
z
+=
2
*
recipCoeff
*
dEdR
*
kz
;
lowrz
=
1
-
numRz
;
float
charge
=
cSim
.
pPosq
[
atom
].
w
;
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
float
kx
=
rx
*
cSim
.
recipBoxSizeX
;
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
float
ky
=
ry
*
cSim
.
recipBoxSizeY
;
float2
tab_xy
=
(
ry
>=
0
?
MultofFloat2
(
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
rx
*
3
+
0
],
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
ry
*
3
+
1
])
:
ConjMultofFloat2
(
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
rx
*
3
+
0
],
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
-
ry
*
3
+
1
]));
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
float
kz
=
rz
*
cSim
.
recipBoxSizeZ
;
int
index
=
rx
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
)
+
(
ry
+
numRy
-
1
)
*
(
numRz
*
2
-
1
)
+
(
rz
+
numRz
-
1
);
float
k2
=
kx
*
kx
+
ky
*
ky
+
kz
*
kz
;
float
ak
=
exp
(
k2
*
cSim
.
factorEwald
)
/
k2
;
float2
structureFactor
=
(
rz
>=
0
?
MultofFloat2
(
tab_xy
,
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
+
rz
*
3
+
2
])
:
ConjMultofFloat2
(
tab_xy
,
cSim
.
pEwaldEikr
[
atom
*
cSim
.
kmax
*
3
-
rz
*
3
+
2
]));
float
dEdR
=
ak
*
charge
*
(
cSim
.
pEwaldCosSinSum
[
index
].
x
*
structureFactor
.
y
-
cSim
.
pEwaldCosSinSum
[
index
].
y
*
structureFactor
.
x
);
cSim
.
pForce4
[
atom
].
x
+=
2
*
recipCoeff
*
dEdR
*
kx
;
cSim
.
pForce4
[
atom
].
y
+=
2
*
recipCoeff
*
dEdR
*
ky
;
cSim
.
pForce4
[
atom
].
z
+=
2
*
recipCoeff
*
dEdR
*
kz
;
lowrz
=
1
-
numRz
;
}
lowry
=
1
-
numRy
;
}
}
lowry
=
1
-
numRy
;
}
}
atom
+=
blockDim
.
x
*
gridDim
.
x
;
atom
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
View file @
96f3680d
...
...
@@ -207,22 +207,15 @@ void kCalculateCDLJForces(gpuContext gpu)
}
else
{
// Vanilla (N2) Ewald
kCalculateCDLJEwaldDirectForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJEwaldDirectForces"
);
kCalculateCDLJEwaldReciprocalForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateCDLJEwaldReciprocalForces"
);
// If using Fast Ewald, uncomment the lines below
// kCalculateEwaldFastEikr_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastEikr");
// kCalculateEwaldFastStructureFactors_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastStructureFactors_kernel");
// kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastCosSinSums");
// kCalculateEwaldFastForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastForces");
kCalculateEwaldFastEikr_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateEwaldFastEikr"
);
kCalculateEwaldFastCosSinSums_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateEwaldFastCosSinSums"
);
kCalculateEwaldFastForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateEwaldFastForces"
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment