Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
72bd8a83
Commit
72bd8a83
authored
Jul 29, 2010
by
Peter Eastman
Browse files
Further optimizations
parent
80d8311e
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
35 additions
and
25 deletions
+35
-25
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
+21
-8
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
...orms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
+3
-5
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
...latforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
+7
-7
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.h
...platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.h
+0
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.h
...orms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.h
+3
-3
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodParticle.h
...s/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodParticle.h
+1
-0
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
View file @
72bd8a83
...
@@ -943,6 +943,8 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
...
@@ -943,6 +943,8 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
static
unsigned
int
threadsPerBlock
=
0
;
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
static
const
char
*
methodName
=
"cudaComputeAmoebaElectrostatic"
;
static
const
char
*
methodName
=
"cudaComputeAmoebaElectrostatic"
;
static
int
timestep
=
0
;
static
int
timestep
=
0
;
...
@@ -959,8 +961,6 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
...
@@ -959,8 +961,6 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
// apparently debug array can take up nontrivial no. registers
// apparently debug array can take up nontrivial no. registers
#undef THREADS_PER_BLOCK
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s %d maxCovalentDegreeSz=%d"
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s %d maxCovalentDegreeSz=%d"
...
@@ -976,15 +976,28 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
...
@@ -976,15 +976,28 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
unsigned
int
targetAtom
=
0
;
unsigned
int
targetAtom
=
0
;
#endif
#endif
// on first pass, set threads/block
if
(
threadsPerBlock
==
0
){
unsigned
int
maxThreads
;
if
(
gpu
->
sm_version
>=
SM_20
)
maxThreads
=
256
;
else
if
(
gpu
->
sm_version
>=
SM_12
)
maxThreads
=
128
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
max
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
ElectrostaticParticle
)),
maxThreads
);
}
kClearFields_3
(
amoebaGpu
,
2
);
kClearFields_3
(
amoebaGpu
,
2
);
if
(
gpu
->
bOutputBufferPerWarp
){
if
(
gpu
->
bOutputBufferPerWarp
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaCudaElectrostaticN2Forces warp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaCudaElectrostaticN2Forces warp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
amoebaGpu
->
nonbondBlocks
,
t
hreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
ElectrostaticParticle
),
sizeof
(
ElectrostaticParticle
)
*
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
sizeof
(
ElectrostaticParticle
),
sizeof
(
ElectrostaticParticle
)
*
t
hreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fflush
(
amoebaGpu
->
log
);
kCalculateAmoebaCudaElectrostaticN2ByWarpForces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
,
sizeof
(
ElectrostaticParticle
)
*
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
>>>
(
kCalculateAmoebaCudaElectrostaticN2ByWarpForces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
t
hreadsPerBlock
,
sizeof
(
ElectrostaticParticle
)
*
t
hreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameDipole
->
_pDevStream
[
0
],
...
@@ -1003,12 +1016,12 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
...
@@ -1003,12 +1016,12 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaCudaElectrostaticN2Forces no warp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaCudaElectrostaticN2Forces no warp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
amoebaGpu
->
nonbondBlocks
,
t
hreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
ElectrostaticParticle
),
sizeof
(
ElectrostaticParticle
)
*
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
sizeof
(
ElectrostaticParticle
),
sizeof
(
ElectrostaticParticle
)
*
t
hreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
#endif
kCalculateAmoebaCudaElectrostaticN2Forces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
,
sizeof
(
ElectrostaticParticle
)
*
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
>>>
(
kCalculateAmoebaCudaElectrostaticN2Forces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
t
hreadsPerBlock
,
sizeof
(
ElectrostaticParticle
)
*
t
hreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameDipole
->
_pDevStream
[
0
],
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
View file @
72bd8a83
...
@@ -27,15 +27,13 @@
...
@@ -27,15 +27,13 @@
#include "amoebaScaleFactors.h"
#include "amoebaScaleFactors.h"
__global__
__global__
/*
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(
GF1XX_NONBOND_THREADS_PER_BLOCK
, 1)
__launch_bounds__
(
256
,
1
)
#elif (__CUDA_ARCH__ >= 130)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(
GT2XX_NONBOND_THREADS_PER_BLOCK
, 1)
__launch_bounds__
(
128
,
1
)
#else
#else
__launch_bounds__(
G8X_NONBOND_THREADS_PER_BLOCK
, 1)
__launch_bounds__
(
64
,
1
)
#endif
#endif
*/
void
METHOD_NAME
(
kCalculateAmoebaCudaElectrostatic
,
Forces_kernel
)(
void
METHOD_NAME
(
kCalculateAmoebaCudaElectrostatic
,
Forces_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
*
workUnit
,
float4
*
atomCoord
,
float4
*
atomCoord
,
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
View file @
72bd8a83
...
@@ -1958,13 +1958,13 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
...
@@ -1958,13 +1958,13 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
// on first pass, set threads/block and based on that setting the energy buffer array
// on first pass, set threads/block and based on that setting the energy buffer array
if
(
threadsPerBlock
==
0
){
if
(
threadsPerBlock
==
0
){
#if (__CUDA_ARCH__ >= 200)
unsigned
int
maxThreads
;
unsigned
int
maxThreads
=
256
;
if
(
gpu
->
sm_version
>=
SM_20
)
#elif (__CUDA_ARCH__ >= 130)
maxThreads
=
256
;
unsigned
int
maxThreads
=
128
;
else
if
(
gpu
->
sm_version
>=
SM_12
)
#else
maxThreads
=
128
;
unsigned
int
maxThreads
=
64
;
else
#endif
maxThreads
=
64
;
threadsPerBlock
=
std
::
max
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
KirkwoodParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
max
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
KirkwoodParticle
)),
maxThreads
);
//unsigned int eDiffhreadsPerBlock = getThreadsPerBlock( amoebaGpu, sizeof(KirkwoodEDiffParticle));
//unsigned int eDiffhreadsPerBlock = getThreadsPerBlock( amoebaGpu, sizeof(KirkwoodEDiffParticle));
//unsigned int maxThreadsPerBlock = threadsPerBlock> eDiffhreadsPerBlock ? threadsPerBlock : eDiffhreadsPerBlock;
//unsigned int maxThreadsPerBlock = threadsPerBlock> eDiffhreadsPerBlock ? threadsPerBlock : eDiffhreadsPerBlock;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.h
View file @
72bd8a83
...
@@ -27,7 +27,6 @@
...
@@ -27,7 +27,6 @@
#include "amoebaScaleFactors.h"
#include "amoebaScaleFactors.h"
__global__
__global__
/*
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
256
,
1
)
__launch_bounds__
(
256
,
1
)
#elif (__CUDA_ARCH__ >= 130)
#elif (__CUDA_ARCH__ >= 130)
...
@@ -35,7 +34,6 @@ __launch_bounds__(128, 1)
...
@@ -35,7 +34,6 @@ __launch_bounds__(128, 1)
#else
#else
__launch_bounds__
(
64
,
1
)
__launch_bounds__
(
64
,
1
)
#endif
#endif
*/
void
METHOD_NAME
(
kCalculateAmoebaCudaKirkwood
,
Forces_kernel
)(
void
METHOD_NAME
(
kCalculateAmoebaCudaKirkwood
,
Forces_kernel
)(
unsigned
int
*
workUnit
unsigned
int
*
workUnit
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.h
View file @
72bd8a83
...
@@ -28,11 +28,11 @@
...
@@ -28,11 +28,11 @@
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
192
,
1
)
#elif (__CUDA_ARCH__ >= 130)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
96
,
1
)
#else
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
32
,
1
)
#endif
#endif
void
METHOD_NAME
(
kCalculateAmoebaCudaKirkwoodEDiff
,
Forces_kernel
)(
void
METHOD_NAME
(
kCalculateAmoebaCudaKirkwoodEDiff
,
Forces_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
*
workUnit
,
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodParticle.h
View file @
72bd8a83
...
@@ -40,6 +40,7 @@ struct KirkwoodParticle {
...
@@ -40,6 +40,7 @@ struct KirkwoodParticle {
float
dBornRadius
;
float
dBornRadius
;
float
dBornRadiusPolar
;
float
dBornRadiusPolar
;
float
padding
;
};
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment