Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
773dd0f0
Commit
773dd0f0
authored
Oct 04, 2010
by
Peter Eastman
Browse files
Optimizations to PME
parent
95f1884c
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
22 additions
and
36 deletions
+22
-36
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
...eba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
+22
-36
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
View file @
773dd0f0
...
@@ -206,13 +206,7 @@ void kFindAmoebaAtomRangeForGrid_kernel()
...
@@ -206,13 +206,7 @@ void kFindAmoebaAtomRangeForGrid_kernel()
}
}
}
}
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
64
,
10
)
__launch_bounds__
(
768
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
384
,
1
)
#else
__launch_bounds__
(
192
,
1
)
#endif
void
kGridSpreadFixedMultipoles_kernel
()
void
kGridSpreadFixedMultipoles_kernel
()
{
{
const
float
xscale
=
cSim
.
pmeGridSize
.
x
*
cSim
.
invPeriodicBoxSizeX
;
const
float
xscale
=
cSim
.
pmeGridSize
.
x
*
cSim
.
invPeriodicBoxSizeX
;
...
@@ -303,13 +297,7 @@ void kGridSpreadFixedMultipoles_kernel()
...
@@ -303,13 +297,7 @@ void kGridSpreadFixedMultipoles_kernel()
}
}
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
64
,
10
)
__launch_bounds__
(
768
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
384
,
1
)
#else
__launch_bounds__
(
192
,
1
)
#endif
void
kGridSpreadInducedDipoles_kernel
()
void
kGridSpreadInducedDipoles_kernel
()
{
{
const
float
xscale
=
cSim
.
pmeGridSize
.
x
*
cSim
.
invPeriodicBoxSizeX
;
const
float
xscale
=
cSim
.
pmeGridSize
.
x
*
cSim
.
invPeriodicBoxSizeX
;
...
@@ -435,11 +423,11 @@ void kAmoebaReciprocalConvolution_kernel()
...
@@ -435,11 +423,11 @@ void kAmoebaReciprocalConvolution_kernel()
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
768
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
384
,
1
)
__launch_bounds__
(
384
,
1
)
#el
se
#el
if (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
192
,
1
)
__launch_bounds__
(
192
,
1
)
#else
__launch_bounds__
(
96
,
1
)
#endif
#endif
void
kComputeFixedPotentialFromGrid_kernel
()
void
kComputeFixedPotentialFromGrid_kernel
()
{
{
...
@@ -551,11 +539,11 @@ void kComputeFixedPotentialFromGrid_kernel()
...
@@ -551,11 +539,11 @@ void kComputeFixedPotentialFromGrid_kernel()
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
768
,
1
)
__launch_bounds__
(
256
,
1
)
#elif (__CUDA_ARCH__ >= 120)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
384
,
1
)
__launch_bounds__
(
128
,
1
)
#else
#else
__launch_bounds__
(
192
,
1
)
__launch_bounds__
(
64
,
1
)
#endif
#endif
void
kComputeInducedPotentialFromGrid_kernel
()
void
kComputeInducedPotentialFromGrid_kernel
()
{
{
...
@@ -978,15 +966,9 @@ void kCalculateAmoebaPMEFixedMultipoleField(amoebaGpuContext amoebaGpu)
...
@@ -978,15 +966,9 @@ void kCalculateAmoebaPMEFixedMultipoleField(amoebaGpuContext amoebaGpu)
{
{
// Compute B-spline coefficients and sort the atoms.
// Compute B-spline coefficients and sort the atoms.
int
threads
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
if
(
gpu
->
sm_version
>=
SM_20
)
int
bsplineThreads
=
(
gpu
->
sm_version
>=
SM_20
?
448
:
(
gpu
->
sm_version
>=
SM_12
?
160
:
160
));
threads
=
448
;
kComputeAmoebaBsplines_kernel
<<<
gpu
->
sim
.
blocks
,
bsplineThreads
,
bsplineThreads
*
AMOEBA_PME_ORDER
*
AMOEBA_PME_ORDER
*
sizeof
(
float
)
>>>
();
else
if
(
gpu
->
sm_version
>=
SM_12
)
threads
=
160
;
else
threads
=
160
;
kComputeAmoebaBsplines_kernel
<<<
gpu
->
sim
.
blocks
,
threads
,
threads
*
AMOEBA_PME_ORDER
*
AMOEBA_PME_ORDER
*
sizeof
(
float
)
>>>
();
LAUNCHERROR
(
"kComputeAmoebaBsplines"
);
LAUNCHERROR
(
"kComputeAmoebaBsplines"
);
bbSort
(
gpu
->
psPmeAtomGridIndex
->
_pDevData
,
gpu
->
natoms
);
bbSort
(
gpu
->
psPmeAtomGridIndex
->
_pDevData
,
gpu
->
natoms
);
kFindAmoebaAtomRangeForGrid_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
kFindAmoebaAtomRangeForGrid_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
...
@@ -994,13 +976,14 @@ void kCalculateAmoebaPMEFixedMultipoleField(amoebaGpuContext amoebaGpu)
...
@@ -994,13 +976,14 @@ void kCalculateAmoebaPMEFixedMultipoleField(amoebaGpuContext amoebaGpu)
// Perform PME for the fixed multipoles.
// Perform PME for the fixed multipoles.
kGridSpreadFixedMultipoles_kernel
<<<
8
*
gpu
->
sim
.
blocks
,
64
>>>
();
kGridSpreadFixedMultipoles_kernel
<<<
10
*
gpu
->
sim
.
blocks
,
64
>>>
();
LAUNCHERROR
(
"kGridSpreadFixedMultipoles"
);
LAUNCHERROR
(
"kGridSpreadFixedMultipoles"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
kAmoebaReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
kAmoebaReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
LAUNCHERROR
(
"kAmoebaReciprocalConvolution"
);
LAUNCHERROR
(
"kAmoebaReciprocalConvolution"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_INVERSE
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_INVERSE
);
kComputeFixedPotentialFromGrid_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
int
potentialThreads
=
(
gpu
->
sm_version
>=
SM_20
?
384
:
(
gpu
->
sm_version
>=
SM_12
?
192
:
96
));
kComputeFixedPotentialFromGrid_kernel
<<<
gpu
->
sim
.
blocks
,
potentialThreads
>>>
();
LAUNCHERROR
(
"kComputeFixedPotentialFromGrid"
);
LAUNCHERROR
(
"kComputeFixedPotentialFromGrid"
);
kRecordFixedMultipoleField_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
(
amoebaGpu
->
psE_Field
->
_pDevData
);
kRecordFixedMultipoleField_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
(
amoebaGpu
->
psE_Field
->
_pDevData
);
LAUNCHERROR
(
"kRecordFixedMultipoleField"
);
LAUNCHERROR
(
"kRecordFixedMultipoleField"
);
...
@@ -1014,13 +997,14 @@ void kCalculateAmoebaPMEInducedDipoleField(amoebaGpuContext amoebaGpu)
...
@@ -1014,13 +997,14 @@ void kCalculateAmoebaPMEInducedDipoleField(amoebaGpuContext amoebaGpu)
// Perform PME for the induced dipoles.
// Perform PME for the induced dipoles.
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kGridSpreadInducedDipoles_kernel
<<<
8
*
gpu
->
sim
.
blocks
,
64
>>>
();
kGridSpreadInducedDipoles_kernel
<<<
10
*
gpu
->
sim
.
blocks
,
64
>>>
();
LAUNCHERROR
(
"kGridSpreadInducedDipoles"
);
LAUNCHERROR
(
"kGridSpreadInducedDipoles"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
kAmoebaReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
kAmoebaReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
LAUNCHERROR
(
"kAmoebaReciprocalConvolution"
);
LAUNCHERROR
(
"kAmoebaReciprocalConvolution"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_INVERSE
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_INVERSE
);
kComputeInducedPotentialFromGrid_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
int
potentialThreads
=
(
gpu
->
sm_version
>=
SM_20
?
256
:
(
gpu
->
sm_version
>=
SM_12
?
128
:
64
));
kComputeInducedPotentialFromGrid_kernel
<<<
gpu
->
sim
.
blocks
,
potentialThreads
>>>
();
LAUNCHERROR
(
"kComputeInducedPotentialFromGrid"
);
LAUNCHERROR
(
"kComputeInducedPotentialFromGrid"
);
kRecordInducedDipoleField_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
(
amoebaGpu
->
psWorkVector
[
0
]
->
_pDevData
,
amoebaGpu
->
psWorkVector
[
1
]
->
_pDevData
);
kRecordInducedDipoleField_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
(
amoebaGpu
->
psWorkVector
[
0
]
->
_pDevData
,
amoebaGpu
->
psWorkVector
[
1
]
->
_pDevData
);
LAUNCHERROR
(
"kRecordInducedDipoleField"
);
LAUNCHERROR
(
"kRecordInducedDipoleField"
);
...
@@ -1034,13 +1018,14 @@ void kCalculateAmoebaPME(amoebaGpuContext amoebaGpu)
...
@@ -1034,13 +1018,14 @@ void kCalculateAmoebaPME(amoebaGpuContext amoebaGpu)
// Perform PME for the fixed multipoles.
// Perform PME for the fixed multipoles.
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kGridSpreadFixedMultipoles_kernel
<<<
8
*
gpu
->
sim
.
blocks
,
64
>>>
();
kGridSpreadFixedMultipoles_kernel
<<<
10
*
gpu
->
sim
.
blocks
,
64
>>>
();
LAUNCHERROR
(
"kGridSpreadFixedMultipoles"
);
LAUNCHERROR
(
"kGridSpreadFixedMultipoles"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
kAmoebaReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
kAmoebaReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
LAUNCHERROR
(
"kAmoebaReciprocalConvolution"
);
LAUNCHERROR
(
"kAmoebaReciprocalConvolution"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_INVERSE
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_INVERSE
);
kComputeFixedPotentialFromGrid_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
int
potentialThreads
=
(
gpu
->
sm_version
>=
SM_20
?
384
:
(
gpu
->
sm_version
>=
SM_12
?
192
:
96
));
kComputeFixedPotentialFromGrid_kernel
<<<
gpu
->
sim
.
blocks
,
potentialThreads
>>>
();
LAUNCHERROR
(
"kComputeFixedPotentialFromGrid"
);
LAUNCHERROR
(
"kComputeFixedPotentialFromGrid"
);
kComputeFixedMultipoleForceAndEnergy_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
kComputeFixedMultipoleForceAndEnergy_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kComputeFixedMultipoleForceAndEnergy"
);
LAUNCHERROR
(
"kComputeFixedMultipoleForceAndEnergy"
);
...
@@ -1048,13 +1033,14 @@ void kCalculateAmoebaPME(amoebaGpuContext amoebaGpu)
...
@@ -1048,13 +1033,14 @@ void kCalculateAmoebaPME(amoebaGpuContext amoebaGpu)
// Perform PME for the induced dipoles.
// Perform PME for the induced dipoles.
kGridSpreadInducedDipoles_kernel
<<<
8
*
gpu
->
sim
.
blocks
,
64
>>>
();
kGridSpreadInducedDipoles_kernel
<<<
10
*
gpu
->
sim
.
blocks
,
64
>>>
();
LAUNCHERROR
(
"kGridSpreadInducedDipoles"
);
LAUNCHERROR
(
"kGridSpreadInducedDipoles"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
kAmoebaReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
kAmoebaReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
LAUNCHERROR
(
"kAmoebaReciprocalConvolution"
);
LAUNCHERROR
(
"kAmoebaReciprocalConvolution"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_INVERSE
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_INVERSE
);
kComputeInducedPotentialFromGrid_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
potentialThreads
=
(
gpu
->
sm_version
>=
SM_20
?
256
:
(
gpu
->
sm_version
>=
SM_12
?
128
:
64
));
kComputeInducedPotentialFromGrid_kernel
<<<
gpu
->
sim
.
blocks
,
potentialThreads
>>>
();
LAUNCHERROR
(
"kComputeInducedPotentialFromGrid"
);
LAUNCHERROR
(
"kComputeInducedPotentialFromGrid"
);
kComputeInducedDipoleForceAndEnergy_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
kComputeInducedDipoleForceAndEnergy_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kComputeInducedDipoleForceAndEnergy"
);
LAUNCHERROR
(
"kComputeInducedDipoleForceAndEnergy"
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment