Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
c2361935
"docs/OpenMMUsersGuide.doc" did not exist on "b66c5425ebedd4eacc9130428a39df45e5dba18c"
Commit
c2361935
authored
Jul 30, 2010
by
Peter Eastman
Browse files
Further optimizations
parent
72bd8a83
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
31 additions
and
15 deletions
+31
-15
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.cu
...cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.cu
+19
-4
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.h
.../cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.h
+3
-3
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
...latforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.h
...rc/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.h
+3
-3
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.h
...orms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.h
+3
-3
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersionParticle.h
...a/src/kernels/kCalculateAmoebaCudaWcaDispersionParticle.h
+1
-0
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
View file @
c2361935
...
@@ -986,7 +986,7 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
...
@@ -986,7 +986,7 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
maxThreads
=
128
;
maxThreads
=
128
;
else
else
maxThreads
=
64
;
maxThreads
=
64
;
threadsPerBlock
=
std
::
m
ax
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
ElectrostaticParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
m
in
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
ElectrostaticParticle
)),
maxThreads
);
}
}
kClearFields_3
(
amoebaGpu
,
2
);
kClearFields_3
(
amoebaGpu
,
2
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.cu
View file @
c2361935
...
@@ -370,6 +370,8 @@ void cudaComputeAmoebaFixedEAndGkFields( amoebaGpuContext amoebaGpu )
...
@@ -370,6 +370,8 @@ void cudaComputeAmoebaFixedEAndGkFields( amoebaGpuContext amoebaGpu )
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
static
unsigned
int
threadsPerBlock
=
0
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
...
@@ -393,11 +395,24 @@ void cudaComputeAmoebaFixedEAndGkFields( amoebaGpuContext amoebaGpu )
...
@@ -393,11 +395,24 @@ void cudaComputeAmoebaFixedEAndGkFields( amoebaGpuContext amoebaGpu )
#endif
#endif
// on first pass, set threads/block
if
(
threadsPerBlock
==
0
){
unsigned
int
maxThreads
;
if
(
gpu
->
sm_version
>=
SM_20
)
maxThreads
=
256
;
else
if
(
gpu
->
sm_version
>=
SM_12
)
maxThreads
=
128
;
else
maxThreads
=
64
;
threadsPerBlock
=
std
::
min
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
FixedFieldParticle
)),
maxThreads
);
}
kClearFields_3
(
amoebaGpu
,
3
);
kClearFields_3
(
amoebaGpu
,
3
);
if
(
gpu
->
bOutputBufferPerWarp
){
if
(
gpu
->
bOutputBufferPerWarp
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"N2 warp
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"N2 warp
\n
"
);
kCalculateAmoebaFixedEAndGkFieldN2ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
>>>
(
kCalculateAmoebaFixedEAndGkFieldN2ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
t
hreadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
t
hreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameDipole
->
_pDevStream
[
0
],
...
@@ -416,12 +431,12 @@ void cudaComputeAmoebaFixedEAndGkFields( amoebaGpuContext amoebaGpu )
...
@@ -416,12 +431,12 @@ void cudaComputeAmoebaFixedEAndGkFields( amoebaGpuContext amoebaGpu )
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
(
void
)
fprintf
(
amoebaGpu
->
log
,
"N2 no warp
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"N2 no warp
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
amoebaGpu
->
nonbondBlocks
,
t
hreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
FixedFieldParticle
),
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
sizeof
(
FixedFieldParticle
),
sizeof
(
FixedFieldParticle
)
*
t
hreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
#endif
kCalculateAmoebaFixedEAndGkFieldN2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondElectrostaticT
hreadsPerBlock
>>>
(
kCalculateAmoebaFixedEAndGkFieldN2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
t
hreadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
t
hreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameDipole
->
_pDevStream
[
0
],
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.h
View file @
c2361935
...
@@ -28,11 +28,11 @@
...
@@ -28,11 +28,11 @@
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
256
,
1
)
#elif (__CUDA_ARCH__ >= 130)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
128
,
1
)
#else
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
64
,
1
)
#endif
#endif
void
METHOD_NAME
(
kCalculateAmoebaFixedEAndGkField
,
_kernel
)(
void
METHOD_NAME
(
kCalculateAmoebaFixedEAndGkField
,
_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
*
workUnit
,
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
View file @
c2361935
...
@@ -1965,7 +1965,7 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
...
@@ -1965,7 +1965,7 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
maxThreads
=
128
;
maxThreads
=
128
;
else
else
maxThreads
=
64
;
maxThreads
=
64
;
threadsPerBlock
=
std
::
m
ax
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
KirkwoodParticle
)),
maxThreads
);
threadsPerBlock
=
std
::
m
in
(
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
KirkwoodParticle
)),
maxThreads
);
//unsigned int eDiffhreadsPerBlock = getThreadsPerBlock( amoebaGpu, sizeof(KirkwoodEDiffParticle));
//unsigned int eDiffhreadsPerBlock = getThreadsPerBlock( amoebaGpu, sizeof(KirkwoodEDiffParticle));
//unsigned int maxThreadsPerBlock = threadsPerBlock> eDiffhreadsPerBlock ? threadsPerBlock : eDiffhreadsPerBlock;
//unsigned int maxThreadsPerBlock = threadsPerBlock> eDiffhreadsPerBlock ? threadsPerBlock : eDiffhreadsPerBlock;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.h
View file @
c2361935
...
@@ -28,11 +28,11 @@
...
@@ -28,11 +28,11 @@
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
256
,
1
)
#elif (__CUDA_ARCH__ >= 130)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
128
,
1
)
#else
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
64
,
1
)
#endif
#endif
void
METHOD_NAME
(
kCalculateAmoebaMutualInducedAndGkFields
,
_kernel
)(
void
METHOD_NAME
(
kCalculateAmoebaMutualInducedAndGkFields
,
_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
*
workUnit
,
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersion.h
View file @
c2361935
...
@@ -26,11 +26,11 @@
...
@@ -26,11 +26,11 @@
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
384
,
1
)
#elif (__CUDA_ARCH__ >= 130)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
192
,
1
)
#else
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
64
,
1
)
#endif
#endif
void
METHOD_NAME
(
kCalculateAmoebaWcaDispersion
,
_kernel
)(
void
METHOD_NAME
(
kCalculateAmoebaWcaDispersion
,
_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
*
workUnit
,
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaWcaDispersionParticle.h
View file @
c2361935
...
@@ -15,6 +15,7 @@ struct WcaDispersionParticle {
...
@@ -15,6 +15,7 @@ struct WcaDispersionParticle {
float
epsilon
;
float
epsilon
;
float
force
[
3
];
float
force
[
3
];
float
padding
;
};
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment