Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
773dd0f0
"...ssh:/git@developer.sourcefind.cn:2222/tsoc/openmm.git" did not exist on "7f6c8bbc2d5c0da3ea7357f2f8077c8320ba8ae6"
Commit
773dd0f0
authored
Oct 04, 2010
by
Peter Eastman
Browse files
Optimizations to PME
parent
95f1884c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
22 additions
and
36 deletions
+22
-36
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
...eba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
+22
-36
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPME.cu
View file @
773dd0f0
...
@@ -206,13 +206,7 @@ void kFindAmoebaAtomRangeForGrid_kernel()
...
@@ -206,13 +206,7 @@ void kFindAmoebaAtomRangeForGrid_kernel()
}
}
}
}
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
64
,
10
)
__launch_bounds__
(
768
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
384
,
1
)
#else
__launch_bounds__
(
192
,
1
)
#endif
void
kGridSpreadFixedMultipoles_kernel
()
void
kGridSpreadFixedMultipoles_kernel
()
{
{
const
float
xscale
=
cSim
.
pmeGridSize
.
x
*
cSim
.
invPeriodicBoxSizeX
;
const
float
xscale
=
cSim
.
pmeGridSize
.
x
*
cSim
.
invPeriodicBoxSizeX
;
...
@@ -303,13 +297,7 @@ void kGridSpreadFixedMultipoles_kernel()
...
@@ -303,13 +297,7 @@ void kGridSpreadFixedMultipoles_kernel()
}
}
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
64
,
10
)
__launch_bounds__
(
768
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
384
,
1
)
#else
__launch_bounds__
(
192
,
1
)
#endif
void
kGridSpreadInducedDipoles_kernel
()
void
kGridSpreadInducedDipoles_kernel
()
{
{
const
float
xscale
=
cSim
.
pmeGridSize
.
x
*
cSim
.
invPeriodicBoxSizeX
;
const
float
xscale
=
cSim
.
pmeGridSize
.
x
*
cSim
.
invPeriodicBoxSizeX
;
...
@@ -435,11 +423,11 @@ void kAmoebaReciprocalConvolution_kernel()
...
@@ -435,11 +423,11 @@ void kAmoebaReciprocalConvolution_kernel()
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
768
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
384
,
1
)
__launch_bounds__
(
384
,
1
)
#el
se
#el
if (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
192
,
1
)
__launch_bounds__
(
192
,
1
)
#else
__launch_bounds__
(
96
,
1
)
#endif
#endif
void
kComputeFixedPotentialFromGrid_kernel
()
void
kComputeFixedPotentialFromGrid_kernel
()
{
{
...
@@ -551,11 +539,11 @@ void kComputeFixedPotentialFromGrid_kernel()
...
@@ -551,11 +539,11 @@ void kComputeFixedPotentialFromGrid_kernel()
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
768
,
1
)
__launch_bounds__
(
256
,
1
)
#elif (__CUDA_ARCH__ >= 120)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
384
,
1
)
__launch_bounds__
(
128
,
1
)
#else
#else
__launch_bounds__
(
192
,
1
)
__launch_bounds__
(
64
,
1
)
#endif
#endif
void
kComputeInducedPotentialFromGrid_kernel
()
void
kComputeInducedPotentialFromGrid_kernel
()
{
{
...
@@ -978,15 +966,9 @@ void kCalculateAmoebaPMEFixedMultipoleField(amoebaGpuContext amoebaGpu)
...
@@ -978,15 +966,9 @@ void kCalculateAmoebaPMEFixedMultipoleField(amoebaGpuContext amoebaGpu)
{
{
// Compute B-spline coefficients and sort the atoms.
// Compute B-spline coefficients and sort the atoms.
int
threads
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
if
(
gpu
->
sm_version
>=
SM_20
)
int
bsplineThreads
=
(
gpu
->
sm_version
>=
SM_20
?
448
:
(
gpu
->
sm_version
>=
SM_12
?
160
:
160
));
threads
=
448
;
kComputeAmoebaBsplines_kernel
<<<
gpu
->
sim
.
blocks
,
bsplineThreads
,
bsplineThreads
*
AMOEBA_PME_ORDER
*
AMOEBA_PME_ORDER
*
sizeof
(
float
)
>>>
();
else
if
(
gpu
->
sm_version
>=
SM_12
)
threads
=
160
;
else
threads
=
160
;
kComputeAmoebaBsplines_kernel
<<<
gpu
->
sim
.
blocks
,
threads
,
threads
*
AMOEBA_PME_ORDER
*
AMOEBA_PME_ORDER
*
sizeof
(
float
)
>>>
();
LAUNCHERROR
(
"kComputeAmoebaBsplines"
);
LAUNCHERROR
(
"kComputeAmoebaBsplines"
);
bbSort
(
gpu
->
psPmeAtomGridIndex
->
_pDevData
,
gpu
->
natoms
);
bbSort
(
gpu
->
psPmeAtomGridIndex
->
_pDevData
,
gpu
->
natoms
);
kFindAmoebaAtomRangeForGrid_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
kFindAmoebaAtomRangeForGrid_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
...
@@ -994,13 +976,14 @@ void kCalculateAmoebaPMEFixedMultipoleField(amoebaGpuContext amoebaGpu)
...
@@ -994,13 +976,14 @@ void kCalculateAmoebaPMEFixedMultipoleField(amoebaGpuContext amoebaGpu)
// Perform PME for the fixed multipoles.
// Perform PME for the fixed multipoles.
kGridSpreadFixedMultipoles_kernel
<<<
8
*
gpu
->
sim
.
blocks
,
64
>>>
();
kGridSpreadFixedMultipoles_kernel
<<<
10
*
gpu
->
sim
.
blocks
,
64
>>>
();
LAUNCHERROR
(
"kGridSpreadFixedMultipoles"
);
LAUNCHERROR
(
"kGridSpreadFixedMultipoles"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
kAmoebaReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
kAmoebaReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
LAUNCHERROR
(
"kAmoebaReciprocalConvolution"
);
LAUNCHERROR
(
"kAmoebaReciprocalConvolution"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_INVERSE
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_INVERSE
);
kComputeFixedPotentialFromGrid_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
int
potentialThreads
=
(
gpu
->
sm_version
>=
SM_20
?
384
:
(
gpu
->
sm_version
>=
SM_12
?
192
:
96
));
kComputeFixedPotentialFromGrid_kernel
<<<
gpu
->
sim
.
blocks
,
potentialThreads
>>>
();
LAUNCHERROR
(
"kComputeFixedPotentialFromGrid"
);
LAUNCHERROR
(
"kComputeFixedPotentialFromGrid"
);
kRecordFixedMultipoleField_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
(
amoebaGpu
->
psE_Field
->
_pDevData
);
kRecordFixedMultipoleField_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
(
amoebaGpu
->
psE_Field
->
_pDevData
);
LAUNCHERROR
(
"kRecordFixedMultipoleField"
);
LAUNCHERROR
(
"kRecordFixedMultipoleField"
);
...
@@ -1014,13 +997,14 @@ void kCalculateAmoebaPMEInducedDipoleField(amoebaGpuContext amoebaGpu)
...
@@ -1014,13 +997,14 @@ void kCalculateAmoebaPMEInducedDipoleField(amoebaGpuContext amoebaGpu)
// Perform PME for the induced dipoles.
// Perform PME for the induced dipoles.
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kGridSpreadInducedDipoles_kernel
<<<
8
*
gpu
->
sim
.
blocks
,
64
>>>
();
kGridSpreadInducedDipoles_kernel
<<<
10
*
gpu
->
sim
.
blocks
,
64
>>>
();
LAUNCHERROR
(
"kGridSpreadInducedDipoles"
);
LAUNCHERROR
(
"kGridSpreadInducedDipoles"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
kAmoebaReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
kAmoebaReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
LAUNCHERROR
(
"kAmoebaReciprocalConvolution"
);
LAUNCHERROR
(
"kAmoebaReciprocalConvolution"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_INVERSE
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_INVERSE
);
kComputeInducedPotentialFromGrid_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
int
potentialThreads
=
(
gpu
->
sm_version
>=
SM_20
?
256
:
(
gpu
->
sm_version
>=
SM_12
?
128
:
64
));
kComputeInducedPotentialFromGrid_kernel
<<<
gpu
->
sim
.
blocks
,
potentialThreads
>>>
();
LAUNCHERROR
(
"kComputeInducedPotentialFromGrid"
);
LAUNCHERROR
(
"kComputeInducedPotentialFromGrid"
);
kRecordInducedDipoleField_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
(
amoebaGpu
->
psWorkVector
[
0
]
->
_pDevData
,
amoebaGpu
->
psWorkVector
[
1
]
->
_pDevData
);
kRecordInducedDipoleField_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
(
amoebaGpu
->
psWorkVector
[
0
]
->
_pDevData
,
amoebaGpu
->
psWorkVector
[
1
]
->
_pDevData
);
LAUNCHERROR
(
"kRecordInducedDipoleField"
);
LAUNCHERROR
(
"kRecordInducedDipoleField"
);
...
@@ -1034,13 +1018,14 @@ void kCalculateAmoebaPME(amoebaGpuContext amoebaGpu)
...
@@ -1034,13 +1018,14 @@ void kCalculateAmoebaPME(amoebaGpuContext amoebaGpu)
// Perform PME for the fixed multipoles.
// Perform PME for the fixed multipoles.
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
kGridSpreadFixedMultipoles_kernel
<<<
8
*
gpu
->
sim
.
blocks
,
64
>>>
();
kGridSpreadFixedMultipoles_kernel
<<<
10
*
gpu
->
sim
.
blocks
,
64
>>>
();
LAUNCHERROR
(
"kGridSpreadFixedMultipoles"
);
LAUNCHERROR
(
"kGridSpreadFixedMultipoles"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
kAmoebaReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
kAmoebaReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
LAUNCHERROR
(
"kAmoebaReciprocalConvolution"
);
LAUNCHERROR
(
"kAmoebaReciprocalConvolution"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_INVERSE
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_INVERSE
);
kComputeFixedPotentialFromGrid_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
int
potentialThreads
=
(
gpu
->
sm_version
>=
SM_20
?
384
:
(
gpu
->
sm_version
>=
SM_12
?
192
:
96
));
kComputeFixedPotentialFromGrid_kernel
<<<
gpu
->
sim
.
blocks
,
potentialThreads
>>>
();
LAUNCHERROR
(
"kComputeFixedPotentialFromGrid"
);
LAUNCHERROR
(
"kComputeFixedPotentialFromGrid"
);
kComputeFixedMultipoleForceAndEnergy_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
kComputeFixedMultipoleForceAndEnergy_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kComputeFixedMultipoleForceAndEnergy"
);
LAUNCHERROR
(
"kComputeFixedMultipoleForceAndEnergy"
);
...
@@ -1048,13 +1033,14 @@ void kCalculateAmoebaPME(amoebaGpuContext amoebaGpu)
...
@@ -1048,13 +1033,14 @@ void kCalculateAmoebaPME(amoebaGpuContext amoebaGpu)
// Perform PME for the induced dipoles.
// Perform PME for the induced dipoles.
kGridSpreadInducedDipoles_kernel
<<<
8
*
gpu
->
sim
.
blocks
,
64
>>>
();
kGridSpreadInducedDipoles_kernel
<<<
10
*
gpu
->
sim
.
blocks
,
64
>>>
();
LAUNCHERROR
(
"kGridSpreadInducedDipoles"
);
LAUNCHERROR
(
"kGridSpreadInducedDipoles"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_FORWARD
);
kAmoebaReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
kAmoebaReciprocalConvolution_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
LAUNCHERROR
(
"kAmoebaReciprocalConvolution"
);
LAUNCHERROR
(
"kAmoebaReciprocalConvolution"
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_INVERSE
);
cufftExecC2C
(
gpu
->
fftplan
,
gpu
->
psPmeGrid
->
_pDevData
,
gpu
->
psPmeGrid
->
_pDevData
,
CUFFT_INVERSE
);
kComputeInducedPotentialFromGrid_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
potentialThreads
=
(
gpu
->
sm_version
>=
SM_20
?
256
:
(
gpu
->
sm_version
>=
SM_12
?
128
:
64
));
kComputeInducedPotentialFromGrid_kernel
<<<
gpu
->
sim
.
blocks
,
potentialThreads
>>>
();
LAUNCHERROR
(
"kComputeInducedPotentialFromGrid"
);
LAUNCHERROR
(
"kComputeInducedPotentialFromGrid"
);
kComputeInducedDipoleForceAndEnergy_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
kComputeInducedDipoleForceAndEnergy_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kComputeInducedDipoleForceAndEnergy"
);
LAUNCHERROR
(
"kComputeInducedDipoleForceAndEnergy"
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment