Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
72bd8a83
"platforms/cuda2/vscode:/vscode.git/clone" did not exist on "e2fc86abeb31b4f95a6473d5a681779466d24bc3"
Commit
72bd8a83
authored
Jul 29, 2010
by
Peter Eastman
Browse files
Further optimizations
parent
80d8311e
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
35 additions
and
25 deletions
+35
-25
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
+21
-8
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
...orms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
+3
-5
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
...latforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
+7
-7
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.h
...platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.h
+0
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.h
...orms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.h
+3
-3
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodParticle.h
...s/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodParticle.h
+1
-0
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
View file @
72bd8a83
...
...
@@ -943,6 +943,8 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
// ---------------------------------------------------------------------------------------
static
unsigned
int
threadsPerBlock
=
0
;
#ifdef AMOEBA_DEBUG
static
const
char
*
methodName
=
"cudaComputeAmoebaElectrostatic"
;
static
int
timestep
=
0
;
...
...
@@ -959,8 +961,6 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
// apparently debug array can take up nontrivial no. registers
#undef THREADS_PER_BLOCK
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s %d maxCovalentDegreeSz=%d"
...
...
@@ -976,15 +976,28 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
unsigned
int
targetAtom
=
0
;
#endif
// on first pass, set threads/block
if
(
threadsPerBlock
==
0
){
unsigned
int
maxThreads
;
if
(
gpu
->
sm_version
>=
SM_20
)
maxThreads
=
256
;
else
if
(
gpu
->
sm_version
>=
SM_12
)
maxThreads
=
128
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
max
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
ElectrostaticParticle
)),
maxThreads
);
}
kClearFields_3
(
amoebaGpu
,
2
);
if
(
gpu
->
bOutputBufferPerWarp
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaCudaElectrostaticN2Forces warp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
ElectrostaticParticle
),
sizeof
(
ElectrostaticParticle
)
*
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
amoebaGpu
->
nonbondBlocks
,
t
hreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
ElectrostaticParticle
),
sizeof
(
ElectrostaticParticle
)
*
t
hreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
kCalculateAmoebaCudaElectrostaticN2ByWarpForces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
,
sizeof
(
ElectrostaticParticle
)
*
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
>>>
(
kCalculateAmoebaCudaElectrostaticN2ByWarpForces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
t
hreadsPerBlock
,
sizeof
(
ElectrostaticParticle
)
*
t
hreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameDipole
->
_pDevStream
[
0
],
...
...
@@ -1003,12 +1016,12 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
#ifdef AMOEBA_DEBUG
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaCudaElectrostaticN2Forces no warp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
ElectrostaticParticle
),
sizeof
(
ElectrostaticParticle
)
*
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
amoebaGpu
->
nonbondBlocks
,
t
hreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
ElectrostaticParticle
),
sizeof
(
ElectrostaticParticle
)
*
t
hreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
kCalculateAmoebaCudaElectrostaticN2Forces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
,
sizeof
(
ElectrostaticParticle
)
*
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
>>>
(
kCalculateAmoebaCudaElectrostaticN2Forces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
t
hreadsPerBlock
,
sizeof
(
ElectrostaticParticle
)
*
t
hreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameDipole
->
_pDevStream
[
0
],
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
View file @
72bd8a83
...
...
@@ -27,15 +27,13 @@
#include "amoebaScaleFactors.h"
__global__
/*
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(
GF1XX_NONBOND_THREADS_PER_BLOCK
, 1)
__launch_bounds__
(
256
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(
GT2XX_NONBOND_THREADS_PER_BLOCK
, 1)
__launch_bounds__
(
128
,
1
)
#else
__launch_bounds__(
G8X_NONBOND_THREADS_PER_BLOCK
, 1)
__launch_bounds__
(
64
,
1
)
#endif
*/
void
METHOD_NAME
(
kCalculateAmoebaCudaElectrostatic
,
Forces_kernel
)(
unsigned
int
*
workUnit
,
float4
*
atomCoord
,
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
View file @
72bd8a83
...
...
@@ -1958,13 +1958,13 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
// on first pass, set threads/block and based on that setting the energy buffer array
if
(
threadsPerBlock
==
0
){
#if (__CUDA_ARCH__ >= 200)
unsigned
int
maxThreads
=
256
;
#elif (__CUDA_ARCH__ >= 130)
unsigned
int
maxThreads
=
128
;
#else
unsigned
int
maxThreads
=
64
;
#endif
unsigned
int
maxThreads
;
if
(
gpu
->
sm_version
>=
SM_20
)
maxThreads
=
256
;
else
if
(
gpu
->
sm_version
>=
SM_12
)
maxThreads
=
128
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
max
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
KirkwoodParticle
)),
maxThreads
);
//unsigned int eDiffhreadsPerBlock = getThreadsPerBlock( amoebaGpu, sizeof(KirkwoodEDiffParticle));
//unsigned int maxThreadsPerBlock = threadsPerBlock> eDiffhreadsPerBlock ? threadsPerBlock : eDiffhreadsPerBlock;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.h
View file @
72bd8a83
...
...
@@ -27,7 +27,6 @@
#include "amoebaScaleFactors.h"
__global__
/*
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
256
,
1
)
#elif (__CUDA_ARCH__ >= 130)
...
...
@@ -35,7 +34,6 @@ __launch_bounds__(128, 1)
#else
__launch_bounds__
(
64
,
1
)
#endif
*/
void
METHOD_NAME
(
kCalculateAmoebaCudaKirkwood
,
Forces_kernel
)(
unsigned
int
*
workUnit
#ifdef AMOEBA_DEBUG
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.h
View file @
72bd8a83
...
...
@@ -28,11 +28,11 @@
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
192
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
96
,
1
)
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
32
,
1
)
#endif
void
METHOD_NAME
(
kCalculateAmoebaCudaKirkwoodEDiff
,
Forces_kernel
)(
unsigned
int
*
workUnit
,
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodParticle.h
View file @
72bd8a83
...
...
@@ -40,6 +40,7 @@ struct KirkwoodParticle {
float
dBornRadius
;
float
dBornRadiusPolar
;
float
padding
;
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment