Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
9fdfaca9
"platforms/brook/src/OpenMMBrookInterface.cpp" did not exist on "baed0187b3659b2eeae38c0dcdf202b99d1713b1"
Commit
9fdfaca9
authored
May 22, 2009
by
Rossen Apostolov
Browse files
More work on fast Ewald (N^3/2) in Cuda
parent
6fd7f3bb
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
101 additions
and
53 deletions
+101
-53
platforms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
...orms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
+93
-47
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
+8
-6
No files found.
platforms/cuda/src/kernels/kCalculateCDLJEwaldFastReciprocal.h
View file @
9fdfaca9
...
@@ -73,25 +73,27 @@ __global__ void kCalculateEwaldFastEikr_kernel()
...
@@ -73,25 +73,27 @@ __global__ void kCalculateEwaldFastEikr_kernel()
while
(
atom
<
cSim
.
atoms
)
while
(
atom
<
cSim
.
atoms
)
{
{
apos
=
cSim
.
pPosq
[
atom
];
//generic form of the array
//generic form of the array
// pEikr[ atomID*kmax*3 + k*3 + m]
// pEikr[ atomID*kmax*3 + k*3 + m]
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
// k = 0, explicitly
// k = 0, explicitly
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
cSim
.
pEikr
[
atom
*
kmax
*
3
+
0
+
m
].
x
=
1
;
cSim
.
pEikr
[
atom
*
kmax
*
3
+
0
+
m
].
x
=
1
;
cSim
.
pEikr
[
atom
*
kmax
*
3
+
0
+
m
].
y
=
0
;
cSim
.
pEikr
[
atom
*
kmax
*
3
+
0
+
m
].
y
=
0
;
}
// k = 1, explicitly
// k = 1, explicitly
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
m
].
x
=
cos
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
0
].
x
=
cos
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
m
].
y
=
sin
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
0
].
y
=
sin
(
apos
.
x
*
cSim
.
recipBoxSizeX
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
4
+
m
].
x
=
cos
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
1
].
x
=
cos
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
4
+
m
].
y
=
sin
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
1
].
y
=
sin
(
apos
.
y
*
cSim
.
recipBoxSizeY
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
5
+
m
].
x
=
cos
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
2
].
x
=
cos
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
5
+
m
].
y
=
sin
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
cSim
.
pEikr
[
atom
*
kmax
*
3
+
3
+
2
].
y
=
sin
(
apos
.
z
*
cSim
.
recipBoxSizeZ
);
}
// k > 1, by recursion
// k > 1, by recursion
for
(
unsigned
int
k
=
2
;
(
k
<
kmax
);
k
++
)
{
for
(
unsigned
int
k
=
2
;
(
k
<
kmax
);
k
++
)
{
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
for
(
unsigned
int
m
=
0
;
(
m
<
3
);
m
++
)
{
...
@@ -103,13 +105,12 @@ __global__ void kCalculateEwaldFastEikr_kernel()
...
@@ -103,13 +105,12 @@ __global__ void kCalculateEwaldFastEikr_kernel()
}
}
}
}
__global__
void
kCalculateEwaldFast
CosSinSum
s_kernel
()
__global__
void
kCalculateEwaldFast
StructureFactor
s_kernel
()
{
{
// hard-coded maximum k-vectors, no interface yet
// hard-coded maximum k-vectors, no interface yet
int
kmax
=
cSim
.
kmax
;
int
kmax
=
cSim
.
kmax
;
// float2 eikr;
float4
apos
;
float4
apos
;
int
lowry
=
0
;
int
lowry
=
0
;
int
lowrz
=
1
;
int
lowrz
=
1
;
...
@@ -125,64 +126,110 @@ __global__ void kCalculateEwaldFastCosSinSums_kernel()
...
@@ -125,64 +126,110 @@ __global__ void kCalculateEwaldFastCosSinSums_kernel()
while
(
atom
<
cSim
.
atoms
)
while
(
atom
<
cSim
.
atoms
)
{
{
apos
=
cSim
.
pPosq
[
atom
];
apos
=
cSim
.
pPosq
[
atom
];
// **********************************************************************
// cSim.pEikr[atom*kmax*3 + k*3 + m]
// cSim.pEikr[atom*kmax*3 + k*3 + m]
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
if
(
ry
>=
0
)
{
if
(
ry
>=
0
)
//
tab_xy[n] = EIR(rx, n, 0) * EIR(ry, n, 1);
{
tab_xy
=
MultofFloat2
(
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rx
*
3
+
0
]
,
cSim
.
pEikr
[
atom
*
kmax
*
3
+
ry
*
3
+
1
]);
tab_xy
=
MultofFloat2
(
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rx
*
3
+
0
]
,
cSim
.
pEikr
[
atom
*
kmax
*
3
+
ry
*
3
+
1
]);
}
}
else
{
else
//
tab_xy[n] = EIR(rx, n, 0) * conj (EIR(-ry, n, 1));
{
tab_xy
=
ConjMultofFloat2
(
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rx
*
3
+
0
]
,
cSim
.
pEikr
[
atom
*
kmax
*
3
-
ry
*
3
+
1
]);
tab_xy
=
ConjMultofFloat2
(
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rx
*
3
+
0
]
,
cSim
.
pEikr
[
atom
*
kmax
*
3
-
ry
*
3
+
1
]);
}
}
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
// next one is scary!
index
=
rx
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
)
+
(
ry
+
numRy
-
1
)
*
(
numRz
*
2
-
1
)
+
(
rz
+
numRz
-
1
);
index
=
rx
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
)
+
(
ry
+
numRy
-
1
)
*
(
numRz
*
2
-
1
)
+
(
rz
+
numRz
-
1
);
if
(
rz
>=
0
)
{
if
(
rz
>=
0
)
//tab_qxyz[n] = atomParameters[n][QIndex] * (tab_xy[n] * EIR(rz, n, 2));
{
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
]
=
FloatMultFloat2
(
apos
.
w
,
MultofFloat2
(
tab_xy
,
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rz
*
3
+
2
]
));
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
]
=
FloatMultFloat2
(
(
apos
.
w
)
,
MultofFloat2
(
tab_xy
,
cSim
.
pEikr
[
atom
*
kmax
*
3
+
rz
*
3
+
2
]
));
}
}
else
{
else
// tab_qxyz[n] = atomParameters[n][QIndex] * (tab_xy[n] * conj(EIR(-rz, n, 2)));
{
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
]
=
FloatMultFloat2
(
apos
.
w
,
ConjMultofFloat2
(
tab_xy
,
cSim
.
pEikr
[
atom
*
kmax
*
3
-
rz
*
3
+
2
]
));
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
]
=
FloatMultFloat2
(
(
apos
.
w
)
,
ConjMultofFloat2
(
tab_xy
,
cSim
.
pEikr
[
atom
*
kmax
*
3
-
rz
*
3
+
2
]
));
}
}
cSim
.
pCosSinSum
[
index
].
x
+
=
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
x
;
cSim
.
pCosSinSum
[
index
].
x
=
0
.
0
;
cSim
.
pCosSinSum
[
index
].
y
+
=
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
y
;
cSim
.
pCosSinSum
[
index
].
y
=
0
.
0
;
lowrz
=
1
-
numRz
;
lowrz
=
1
-
numRz
;
}
}
lowry
=
1
-
numRy
;
lowry
=
1
-
numRy
;
}
}
}
}
// **********************************************************************
atom
+=
blockDim
.
x
*
gridDim
.
x
;
atom
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
}
}
__global__
void
kCalculateEwaldFastCosSinSums_kernel
()
{
// float2 eikr;
int
lowry
=
0
;
int
lowrz
=
1
;
int
numRx
=
20
+
1
;
int
numRy
=
20
+
1
;
int
numRz
=
20
+
1
;
unsigned
int
totalK
=
(
numRx
*
2
-
1
)
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
);
int
index
;
unsigned
int
rx
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
rx
<
numRx
)
{
// **********************************************************************
// cSim.pEikr[atom*kmax*3 + k*3 + m]
// for(int rx = 0; rx < numRx; rx++) {
for
(
int
ry
=
lowry
;
ry
<
numRy
;
ry
++
)
{
for
(
int
rz
=
lowrz
;
rz
<
numRz
;
rz
++
)
{
index
=
rx
*
(
numRy
*
2
-
1
)
*
(
numRz
*
2
-
1
)
+
(
ry
+
numRy
-
1
)
*
(
numRz
*
2
-
1
)
+
(
rz
+
numRz
-
1
);
for
(
int
atom
=
0
;
atom
<
cSim
.
atoms
;
atom
++
)
{
cSim
.
pCosSinSum
[
index
].
x
+=
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
x
;
cSim
.
pCosSinSum
[
index
].
y
+=
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
y
;
}
lowrz
=
1
-
numRz
;
}
lowry
=
1
-
numRy
;
}
rx
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
__global__
void
kCalculateEwaldFastForces_kernel
()
__global__
void
kCalculateEwaldFastForces_kernel
()
{
{
float
PI
=
3
.
14159265358979323846
f
;
float
PI
=
3
.
14159265358979323846
f
;
// hard-coded maximum k-vectors, no interface yet
// int kmax = cSim.kmax;
const
float
epsilon
=
1
.
0
;
const
float
epsilon
=
1
.
0
;
float
recipCoeff
=
(
4
*
PI
/
cSim
.
V
/
epsilon
);
float
recipCoeff
=
(
4
*
PI
/
cSim
.
V
/
epsilon
);
// float2 eikr;
// float4 apos;
int
lowry
=
0
;
int
lowry
=
0
;
int
lowrz
=
1
;
int
lowrz
=
1
;
int
numRx
=
20
+
1
;
int
numRx
=
20
+
1
;
...
@@ -195,10 +242,6 @@ __global__ void kCalculateEwaldFastForces_kernel()
...
@@ -195,10 +242,6 @@ __global__ void kCalculateEwaldFastForces_kernel()
while
(
atom
<
cSim
.
atoms
)
while
(
atom
<
cSim
.
atoms
)
{
{
// apos = cSim.pPosq[atom];
// **********************************************************************
// cSim.pEikr[atom*kmax*3 + k*3 + m]
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
for
(
int
rx
=
0
;
rx
<
numRx
;
rx
++
)
{
...
@@ -219,6 +262,7 @@ __global__ void kCalculateEwaldFastForces_kernel()
...
@@ -219,6 +262,7 @@ __global__ void kCalculateEwaldFastForces_kernel()
float
ak
=
exp
(
k2
*
cSim
.
factorEwald
)
/
k2
;
float
ak
=
exp
(
k2
*
cSim
.
factorEwald
)
/
k2
;
float
dEdR
=
ak
*
(
cSim
.
pCosSinSum
[
index
].
x
*
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
y
-
cSim
.
pCosSinSum
[
index
].
y
*
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
x
);
float
dEdR
=
ak
*
(
cSim
.
pCosSinSum
[
index
].
x
*
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
y
-
cSim
.
pCosSinSum
[
index
].
y
*
cSim
.
pStructureFactor
[
atom
*
totalK
+
index
].
x
);
cSim
.
pForce4
[
atom
].
x
+=
2
*
recipCoeff
*
dEdR
*
kx
;
cSim
.
pForce4
[
atom
].
x
+=
2
*
recipCoeff
*
dEdR
*
kx
;
cSim
.
pForce4
[
atom
].
y
+=
2
*
recipCoeff
*
dEdR
*
ky
;
cSim
.
pForce4
[
atom
].
y
+=
2
*
recipCoeff
*
dEdR
*
ky
;
cSim
.
pForce4
[
atom
].
z
+=
2
*
recipCoeff
*
dEdR
*
kz
;
cSim
.
pForce4
[
atom
].
z
+=
2
*
recipCoeff
*
dEdR
*
kz
;
...
@@ -228,7 +272,9 @@ __global__ void kCalculateEwaldFastForces_kernel()
...
@@ -228,7 +272,9 @@ __global__ void kCalculateEwaldFastForces_kernel()
lowry
=
1
-
numRy
;
lowry
=
1
-
numRy
;
}
}
}
}
// **********************************************************************
atom
+=
blockDim
.
x
*
gridDim
.
x
;
atom
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
}
}
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
View file @
9fdfaca9
...
@@ -215,12 +215,14 @@ void kCalculateCDLJForces(gpuContext gpu)
...
@@ -215,12 +215,14 @@ void kCalculateCDLJForces(gpuContext gpu)
LAUNCHERROR
(
"kCalculateCDLJEwaldReciprocalForces"
);
LAUNCHERROR
(
"kCalculateCDLJEwaldReciprocalForces"
);
// If using Fast Ewald, uncomment the lines below
// If using Fast Ewald, uncomment the lines below
// kCalculateEwaldFastEikr_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// kCalculateEwaldFastEikr_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastEikr");
// LAUNCHERROR("kCalculateEwaldFastEikr");
// kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// kCalculateEwaldFastStructureFactors_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastCosSinSums");
// LAUNCHERROR("kCalculateEwaldFastStructureFactors_kernel");
// kCalculateEwaldFastForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastForces");
// LAUNCHERROR("kCalculateEwaldFastCosSinSums");
// kCalculateEwaldFastForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
// LAUNCHERROR("kCalculateEwaldFastForces");
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment