Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
9fdfaca9
Commit
9fdfaca9
authored
May 22, 2009
by
Rossen Apostolov
Browse files
More work on fast Ewald (N^3/2) in Cuda
parent
6fd7f3bb
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
101 additions
and
53 deletions
+101
-53
platforms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
...orms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
+93
-47
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
+8
-6
No files found.
platforms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
View file @
9fdfaca9
...
@@ -73,25 +73,27 @@ __global__ void kCalculateEwaldFastEikr_kernel()
...
@@ -73,25 +73,27 @@ __global__ void kCalculateEwaldFastEikr_kernel()
while
(
atom
<
cSim
.
atoms
)
while
(
atom
<
cSim
.
atoms
)
{
{
apos
=
cSim
.
pPosq
[
atom
];
//generic form of the array
//generic form of the array
// pEikr[ atomID*kmax*3 + k*3 + m]
// pEikr[ atomID*kmax*3 + k*3 + m]
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
// k = 0, explicitly
// k = 0, explicitly
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
cSim
.
pEikr
[
atom
*
kmax
*
3
+
0
+
m
].
x
=
1
;
cSim
.
pEikr
[
atom
*
kmax
*
3
+
0
+
m
].
x
=
1
;
cSim
.
pEikr
[
atom
*
kmax
*
3
+
0
+
m
].
y
=
0
;
cSim
.
pEikr
[
atom
*
kmax
*
3
+
0
+
m
].
y
=
0
;
}
// k = 1, explicitly
// k = 1, explicitly
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
m
].
x
=
cos
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
0
].
x
=
cos
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
m
].
y
=
sin
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
0
].
y
=
sin
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
4
+
m
].
x
=
cos
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
1
].
x
=
cos
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
4
+
m
].
y
=
sin
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
1
].
y
=
sin
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
5
+
m
].
x
=
cos
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
2
].
x
=
cos
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
5
+
m
].
y
=
sin
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
2
].
y
=
sin
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
}
// k > 1, by recursion
// k > 1, by recursion
for
(
unsigned
int
k
=
2
;
(
k
<
kmax
);
k
++
)
{
for
(
unsigned
int
k
=
2
;
(
k
<
kmax
);
k
++
)
{
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
...
@@ -103,13 +105,12 @@ __global__ void kCalculateEwaldFastEikr_kernel()
...
@@ -103,13 +105,12 @@ __global__ void kCalculateEwaldFastEikr_kernel()
}
}
}
}
__global__
void
kCalculateEwaldFast
CosSinSum
s_kernel
()
__global__
void
kCalculateEwaldFast
StructureFactor
s_kernel
()
{
{
// hard-coded maximum k-vectors, no interface yet
// hard-coded maximum k-vectors, no interface yet
int
kmax
=
cSim
.
kmax
;
int
kmax
=
cSim
.
kmax
;
// float2 eikr;
float4
apos
;
float4
apos
;
int
lowry
=
0
;
int
lowry
=
0
;
int
lowrz
=
1
;
int
lowrz
=
1
;
...
@@ -125,64 +126,110 @@ __global__ void kCalculateEwaldFastCosSinSums_kernel()
...
@@ -125,64 +126,110 @@ __global__ void kCalculateEwaldFastCosSinSums_kernel()
while
(
atom
<
cSim
.
atoms
)
while
(
atom
<
cSim
.
atoms
)
{
{
apos
=
cSim
.
pPosq
[
atom
];
apos
=
cSim
.
pPosq
[
atom
];
// cSim.pEikr[atom*kmax*3 + k*3 + m]
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
if
(
ry
>=
0
)
{
tab_xy
=
MultofFloat2
(
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rx
*
3
+
0
]
,
cSim
.
pEikr
[
atom
*
kmax
*
3
+
ry
*
3
+
1
]);
}
else
{
tab_xy
=
ConjMultofFloat2
(
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rx
*
3
+
0
]
,
cSim
.
pEikr
[
atom
*
kmax
*
3
-
ry
*
3
+
1
]);
}
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
index
=
rx
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
)
+
(
ry
+
numRy
-
1
)
*
(
numRz
*
2
-
1
)
+
(
rz
+
numRz
-
1
);
if
(
rz
>=
0
)
{
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
]
=
FloatMultFloat2
(
(
apos
.
w
),
MultofFloat2
(
tab_xy
,
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rz
*
3
+
2
]
));
}
else
{
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
]
=
FloatMultFloat2
(
(
apos
.
w
),
ConjMultofFloat2
(
tab_xy
,
cSim
.
pEikr
[
atom
*
kmax
*
3
-
rz
*
3
+
2
]
));
}
cSim
.
pCosSinSum
[
index
].
x
=
0
.
0
;
cSim
.
pCosSinSum
[
index
].
y
=
0
.
0
;
lowrz
=
1
-
numRz
;
}
lowry
=
1
-
numRy
;
}
}
atom
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
__global__
void
kCalculateEwaldFastCosSinSums_kernel
()
{
// float2 eikr;
int
lowry
=
0
;
int
lowrz
=
1
;
int
numRx
=
20
+
1
;
int
numRy
=
20
+
1
;
int
numRz
=
20
+
1
;
unsigned
int
totalK
=
(
numRx
*
2
-
1
)
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
);
int
index
;
unsigned
int
rx
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
rx
<
numRx
)
{
// **********************************************************************
// **********************************************************************
// cSim.pEikr[atom*kmax*3 + k*3 + m]
// cSim.pEikr[atom*kmax*3 + k*3 + m]
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
//
for(int rx = 0; rx < numRx; rx++) {
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
if
(
ry
>=
0
)
{
// tab_xy[n] = EIR(rx, n, 0) * EIR(ry, n, 1);
tab_xy
=
MultofFloat2
(
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rx
*
3
+
0
]
,
cSim
.
pEikr
[
atom
*
kmax
*
3
+
ry
*
3
+
1
]);
}
else
{
// tab_xy[n] = EIR(rx, n, 0) * conj (EIR(-ry, n, 1));
tab_xy
=
ConjMultofFloat2
(
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rx
*
3
+
0
]
,
cSim
.
pEikr
[
atom
*
kmax
*
3
-
ry
*
3
+
1
]);
}
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
// next one is scary!
index
=
rx
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
)
+
(
ry
+
numRy
-
1
)
*
(
numRz
*
2
-
1
)
+
(
rz
+
numRz
-
1
);
index
=
rx
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
)
+
(
ry
+
numRy
-
1
)
*
(
numRz
*
2
-
1
)
+
(
rz
+
numRz
-
1
);
if
(
rz
>=
0
)
{
for
(
int
atom
=
0
;
atom
<
cSim
.
atoms
;
atom
++
)
//tab_qxyz[n] = atomParameters[n][QIndex] * (tab_xy[n] * EIR(rz, n, 2));
{
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
]
=
FloatMultFloat2
(
apos
.
w
,
MultofFloat2
(
tab_xy
,
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rz
*
3
+
2
]
));
cSim
.
pCosSinSum
[
index
].
x
+=
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
x
;
}
cSim
.
pCosSinSum
[
index
].
y
+=
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
y
;
}
else
{
// tab_qxyz[n] = atomParameters[n][QIndex] * (tab_xy[n] * conj(EIR(-rz, n, 2)));
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
]
=
FloatMultFloat2
(
apos
.
w
,
ConjMultofFloat2
(
tab_xy
,
cSim
.
pEikr
[
atom
*
kmax
*
3
-
rz
*
3
+
2
]
));
}
cSim
.
pCosSinSum
[
index
].
x
+=
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
x
;
cSim
.
pCosSinSum
[
index
].
y
+=
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
y
;
lowrz
=
1
-
numRz
;
lowrz
=
1
-
numRz
;
}
}
lowry
=
1
-
numRy
;
lowry
=
1
-
numRy
;
}
}
rx
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
// **********************************************************************
atom
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
}
__global__
void
kCalculateEwaldFastForces_kernel
()
__global__
void
kCalculateEwaldFastForces_kernel
()
{
{
float
PI
=
3
.
14159265358979323846
f
;
float
PI
=
3
.
14159265358979323846
f
;
// hard-coded maximum k-vectors, no interface yet
// int kmax = cSim.kmax;
const
float
epsilon
=
1
.
0
;
const
float
epsilon
=
1
.
0
;
float
recipCoeff
=
(
4
*
PI
/
cSim
.
V
/
epsilon
);
float
recipCoeff
=
(
4
*
PI
/
cSim
.
V
/
epsilon
);
// float2 eikr;
// float4 apos;
int
lowry
=
0
;
int
lowry
=
0
;
int
lowrz
=
1
;
int
lowrz
=
1
;
int
numRx
=
20
+
1
;
int
numRx
=
20
+
1
;
...
@@ -195,10 +242,6 @@ __global__ void kCalculateEwaldFastForces_kernel()
...
@@ -195,10 +242,6 @@ __global__ void kCalculateEwaldFastForces_kernel()
while
(
atom
<
cSim
.
atoms
)
while
(
atom
<
cSim
.
atoms
)
{
{
// apos = cSim.pPosq[atom];
// **********************************************************************
// cSim.pEikr[atom*kmax*3 + k*3 + m]
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
...
@@ -219,6 +262,7 @@ __global__ void kCalculateEwaldFastForces_kernel()
...
@@ -219,6 +262,7 @@ __global__ void kCalculateEwaldFastForces_kernel()
float
ak
=
exp
(
k2
*
cSim
.
factorEwald
)
/
k2
;
float
ak
=
exp
(
k2
*
cSim
.
factorEwald
)
/
k2
;
float
dEdR
=
ak
*
(
cSim
.
pCosSinSum
[
index
].
x
*
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
y
-
cSim
.
pCosSinSum
[
index
].
y
*
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
x
);
float
dEdR
=
ak
*
(
cSim
.
pCosSinSum
[
index
].
x
*
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
y
-
cSim
.
pCosSinSum
[
index
].
y
*
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
x
);
cSim
.
pForce4
[
atom
].
x
+=
2
*
recipCoeff
*
dEdR
*
kx
;
cSim
.
pForce4
[
atom
].
x
+=
2
*
recipCoeff
*
dEdR
*
kx
;
cSim
.
pForce4
[
atom
].
y
+=
2
*
recipCoeff
*
dEdR
*
ky
;
cSim
.
pForce4
[
atom
].
y
+=
2
*
recipCoeff
*
dEdR
*
ky
;
cSim
.
pForce4
[
atom
].
z
+=
2
*
recipCoeff
*
dEdR
*
kz
;
cSim
.
pForce4
[
atom
].
z
+=
2
*
recipCoeff
*
dEdR
*
kz
;
...
@@ -228,7 +272,9 @@ __global__ void kCalculateEwaldFastForces_kernel()
...
@@ -228,7 +272,9 @@ __global__ void kCalculateEwaldFastForces_kernel()
lowry
=
1
-
numRy
;
lowry
=
1
-
numRy
;
}
}
}
}
// **********************************************************************
atom
+=
blockDim
.
x
*
gridDim
.
x
;
atom
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
}
}
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
View file @
9fdfaca9
...
@@ -215,12 +215,14 @@ void kCalculateCDLJForces(gpuContext gpu)
...
@@ -215,12 +215,14 @@ void kCalculateCDLJForces(gpuContext gpu)
LAUNCHERROR
(
"kCalculateCDLJEwaldReciprocalForces"
);
LAUNCHERROR
(
"kCalculateCDLJEwaldReciprocalForces"
);
// If using Fast Ewald, uncomment the lines below
// If using Fast Ewald, uncomment the lines below
// kCalculateEwaldFastEikr_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// kCalculateEwaldFastEikr_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastEikr");
// LAUNCHERROR("kCalculateEwaldFastEikr");
// kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// kCalculateEwaldFastStructureFactors_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastCosSinSums");
// LAUNCHERROR("kCalculateEwaldFastStructureFactors_kernel");
// kCalculateEwaldFastForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastForces");
// LAUNCHERROR("kCalculateEwaldFastCosSinSums");
// kCalculateEwaldFastForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastForces");
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment