Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
5ac57f16
"SenseNova-SI-main/docs/vscode:/vscode.git/clone" did not exist on "876a36a4bf5038630179a3e0849332dae7449e45"
Commit
5ac57f16
authored
Aug 27, 2009
by
Peter Eastman
Browse files
Preliminary version of PME charge spreading kernel
parent
a4b2a13b
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
76 additions
and
4 deletions
+76
-4
platforms/cuda/src/kernels/kCalculatePME.cu
platforms/cuda/src/kernels/kCalculatePME.cu
+76
-4
No files found.
platforms/cuda/src/kernels/kCalculatePME.cu
View file @
5ac57f16
...
...
@@ -86,7 +86,7 @@ inline __host__ __device__ float4 make_float4(int3 a)
return
make_float4
(
a
.
x
,
a
.
y
,
a
.
z
,
0
);
}
__global__
void
kUpdateGridIndexAndFraction
()
__global__
void
kUpdateGridIndexAndFraction
_kernel
()
{
unsigned
int
tnb
=
blockDim
.
x
*
gridDim
.
x
;
unsigned
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
...
@@ -118,7 +118,7 @@ __global__ void kUpdateGridIndexAndFraction()
}
}
__global__
void
kUpdateBsplines
()
__global__
void
kUpdateBsplines
_kernel
()
{
unsigned
int
tnb
=
blockDim
.
x
*
gridDim
.
x
;
unsigned
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
...
@@ -193,11 +193,83 @@ __global__ void kUpdateBsplines()
}
}
__global__
void
kGridSpreadCharge_kernel
()
{
extern
__shared__
float4
atomPos
[];
int4
*
atomGridIndex
=
(
int4
*
)
&
atomPos
[
cSim
.
atoms
];
const
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
const
int3
groupDim
=
make_int3
(
4
,
4
,
2
);
const
int3
numGroups
=
make_int3
((
cSim
.
pmeGridSize
.
x
+
groupDim
.
x
-
1
)
/
groupDim
.
x
,
(
cSim
.
pmeGridSize
.
y
+
groupDim
.
y
-
1
)
/
groupDim
.
y
,
(
cSim
.
pmeGridSize
.
z
+
groupDim
.
z
-
1
)
/
groupDim
.
z
);
const
unsigned
int
totalGroups
=
numGroups
.
x
*
numGroups
.
y
*
numGroups
.
z
;
unsigned
int
group
=
warp
*
totalGroups
/
totalWarps
;
const
unsigned
int
end
=
(
warp
+
1
)
*
totalGroups
/
totalWarps
;
const
unsigned
int
index
=
threadIdx
.
x
&
(
GRID
-
1
);
while
(
group
<
end
)
{
// Process a group of grid points of size groupDim. First figure out the base index for the group,
// and the index of the specific point this thread will handle.
int3
gridBase
;
gridBase
.
x
=
group
/
(
numGroups
.
y
*
numGroups
.
z
);
int
remainder
=
group
-
gridBase
.
x
*
numGroups
.
y
*
numGroups
.
z
;
gridBase
.
y
=
remainder
/
numGroups
.
z
;
gridBase
.
z
=
remainder
-
gridBase
.
y
*
numGroups
.
z
;
gridBase
.
x
*=
groupDim
.
x
;
gridBase
.
y
*=
groupDim
.
y
;
gridBase
.
z
*=
groupDim
.
z
;
int3
gridPoint
;
gridPoint
.
x
=
index
/
(
groupDim
.
y
*
groupDim
.
z
);
remainder
=
index
-
gridPoint
.
x
*
groupDim
.
y
*
groupDim
.
z
;
gridPoint
.
y
=
remainder
/
groupDim
.
z
;
gridPoint
.
z
=
remainder
-
gridPoint
.
y
*
groupDim
.
z
;
gridPoint
.
x
+=
gridBase
.
x
;
gridPoint
.
y
+=
gridBase
.
y
;
gridPoint
.
z
+=
gridBase
.
z
;
// Loop over blocks of atoms.
float
result
=
0.0
f
;
for
(
int
atomBlock
=
0
;
atomBlock
<
cSim
.
paddedNumberOfAtoms
>>
GRIDBITS
;
atomBlock
++
)
{
int
atomIndex
=
(
atomBlock
<<
GRIDBITS
)
+
index
;
if
(
atomIndex
<
cSim
.
atoms
)
{
atomPos
[
threadIdx
.
x
]
=
cSim
.
pPosq
[
atomIndex
];
atomGridIndex
[
threadIdx
.
x
]
=
cSim
.
pPmeParticleIndex
[
atomIndex
];
}
int
maxAtoms
=
min
(
GRID
,
cSim
.
atoms
-
(
atomBlock
<<
GRIDBITS
));
for
(
int
i
=
0
;
i
<
maxAtoms
;
i
++
)
{
int
atomIndex
=
(
atomBlock
<<
GRIDBITS
)
+
i
;
int
ix
=
gridPoint
.
x
-
atomGridIndex
[
threadIdx
.
x
-
index
+
i
].
x
;
int
iy
=
gridPoint
.
y
-
atomGridIndex
[
threadIdx
.
x
-
index
+
i
].
y
;
int
iz
=
gridPoint
.
z
-
atomGridIndex
[
threadIdx
.
x
-
index
+
i
].
z
;
if
(
ix
<
0
)
ix
+=
cSim
.
pmeGridSize
.
x
;
if
(
iy
<
0
)
iy
+=
cSim
.
pmeGridSize
.
y
;
if
(
iz
<
0
)
iz
+=
cSim
.
pmeGridSize
.
z
;
if
(
ix
<
PME_ORDER
&&
iy
<
PME_ORDER
&&
iz
<
PME_ORDER
)
result
+=
atomPos
[
threadIdx
.
x
-
index
+
i
].
w
*
cSim
.
pPmeBsplineTheta
[
atomIndex
*
PME_ORDER
+
ix
].
x
*
cSim
.
pPmeBsplineTheta
[
atomIndex
*
PME_ORDER
+
iy
].
y
*
cSim
.
pPmeBsplineTheta
[
atomIndex
*
PME_ORDER
+
iz
].
z
;
}
}
unsigned
int
gridIndex
=
gridPoint
.
x
*
cSim
.
pmeGridSize
.
y
*
cSim
.
pmeGridSize
.
z
+
gridPoint
.
y
*
cSim
.
pmeGridSize
.
z
+
gridPoint
.
z
;
cSim
.
pPmeGrid
[
gridIndex
]
=
make_cuComplex
(
gridIndex
<
cSim
.
pmeGridSize
.
x
*
cSim
.
pmeGridSize
.
y
*
cSim
.
pmeGridSize
.
z
?
result
*
sqrt
(
cSim
.
epsfac
)
:
0.0
f
,
0.0
f
);
group
++
;
}
}
void
kCalculatePME
(
gpuContext
gpu
)
{
// printf("kCalculatePME\n");
kUpdateGridIndexAndFraction
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
kUpdateGridIndexAndFraction
_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kUpdateGridIndexAndFraction"
);
kUpdateBsplines
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
,
2
*
gpu
->
sim
.
update_threads_per_block
*
PME_ORDER
*
sizeof
(
float4
)
>>>
();
kUpdateBsplines_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
,
2
*
gpu
->
sim
.
update_threads_per_block
*
PME_ORDER
*
sizeof
(
float4
)
>>>
();
LAUNCHERROR
(
"kUpdateBsplines"
);
kGridSpreadCharge_kernel
<<<
gpu
->
sim
.
blocks
,
64
,
64
*
(
sizeof
(
float4
)
+
sizeof
(
int4
))
>>>
();
LAUNCHERROR
(
"kUpdateBsplines"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment