Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
f5ea8297
Commit
f5ea8297
authored
Mar 25, 2010
by
Peter Eastman
Browse files
Optimization (clear the force and Born sum buffers in a single kernel)
parent
2cb112f0
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
65 additions
and
93 deletions
+65
-93
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+10
-3
platforms/cuda/src/kernels/cudaKernels.h
platforms/cuda/src/kernels/cudaKernels.h
+1
-1
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
+0
-4
platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu
platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu
+0
-17
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
+0
-21
platforms/cuda/src/kernels/kForces.cu
platforms/cuda/src/kernels/kForces.cu
+54
-47
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
f5ea8297
...
@@ -47,7 +47,10 @@ void CudaCalcForcesAndEnergyKernel::beginForceComputation(ContextImpl& context)
...
@@ -47,7 +47,10 @@ void CudaCalcForcesAndEnergyKernel::beginForceComputation(ContextImpl& context)
if
(
data
.
nonbondedMethod
!=
NO_CUTOFF
&&
data
.
computeForceCount
%
100
==
0
)
if
(
data
.
nonbondedMethod
!=
NO_CUTOFF
&&
data
.
computeForceCount
%
100
==
0
)
gpuReorderAtoms
(
gpu
);
gpuReorderAtoms
(
gpu
);
data
.
computeForceCount
++
;
data
.
computeForceCount
++
;
kClearForces
(
gpu
);
if
(
gpu
->
bIncludeGBSA
||
gpu
->
bIncludeGBVI
)
kClearBornSumAndForces
(
gpu
);
else
kClearForces
(
gpu
);
}
}
void
CudaCalcForcesAndEnergyKernel
::
finishForceComputation
(
ContextImpl
&
context
)
{
void
CudaCalcForcesAndEnergyKernel
::
finishForceComputation
(
ContextImpl
&
context
)
{
...
@@ -77,6 +80,8 @@ void CudaCalcForcesAndEnergyKernel::beginEnergyComputation(ContextImpl& context)
...
@@ -77,6 +80,8 @@ void CudaCalcForcesAndEnergyKernel::beginEnergyComputation(ContextImpl& context)
gpuReorderAtoms
(
gpu
);
gpuReorderAtoms
(
gpu
);
data
.
stepCount
++
;
data
.
stepCount
++
;
kClearEnergy
(
gpu
);
kClearEnergy
(
gpu
);
if
(
gpu
->
bIncludeGBSA
||
gpu
->
bIncludeGBVI
)
kClearBornSumAndForces
(
gpu
);
}
}
double
CudaCalcForcesAndEnergyKernel
::
finishEnergyComputation
(
ContextImpl
&
context
)
{
double
CudaCalcForcesAndEnergyKernel
::
finishEnergyComputation
(
ContextImpl
&
context
)
{
...
@@ -790,8 +795,10 @@ void OPENMMCUDA_EXPORT OpenMM::cudaOpenMMInitializeIntegration(const System& sys
...
@@ -790,8 +795,10 @@ void OPENMMCUDA_EXPORT OpenMM::cudaOpenMMInitializeIntegration(const System& sys
gpuBuildExclusionList
(
gpu
);
gpuBuildExclusionList
(
gpu
);
gpuBuildOutputBuffers
(
gpu
);
gpuBuildOutputBuffers
(
gpu
);
gpuSetConstants
(
gpu
);
gpuSetConstants
(
gpu
);
kClearBornForces
(
gpu
);
if
(
gpu
->
bIncludeGBSA
||
gpu
->
bIncludeGBVI
)
kClearForces
(
gpu
);
kClearBornSumAndForces
(
gpu
);
else
kClearForces
(
gpu
);
cudaThreadSynchronize
();
cudaThreadSynchronize
();
}
}
...
...
platforms/cuda/src/kernels/cudaKernels.h
View file @
f5ea8297
...
@@ -29,7 +29,7 @@
...
@@ -29,7 +29,7 @@
// Initialization
// Initialization
extern
void
kClearForces
(
gpuContext
gpu
);
extern
void
kClearForces
(
gpuContext
gpu
);
extern
void
kClearEnergy
(
gpuContext
gpu
);
extern
void
kClearEnergy
(
gpuContext
gpu
);
extern
void
kClearBornForces
(
gpuContext
gpu
);
extern
void
kClearBorn
SumAnd
Forces
(
gpuContext
gpu
);
extern
void
kClearObcGbsaBornSum
(
gpuContext
gpu
);
extern
void
kClearObcGbsaBornSum
(
gpuContext
gpu
);
extern
void
kCalculateObcGbsaBornSum
(
gpuContext
gpu
);
extern
void
kCalculateObcGbsaBornSum
(
gpuContext
gpu
);
extern
void
kReduceObcGbsaBornSum
(
gpuContext
gpu
);
extern
void
kReduceObcGbsaBornSum
(
gpuContext
gpu
);
...
...
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
View file @
f5ea8297
...
@@ -134,10 +134,6 @@ extern void kCalculatePME(gpuContext gpu);
...
@@ -134,10 +134,6 @@ extern void kCalculatePME(gpuContext gpu);
void
kCalculateCDLJObcGbsaForces1
(
gpuContext
gpu
)
void
kCalculateCDLJObcGbsaForces1
(
gpuContext
gpu
)
{
{
// printf("kCalculateCDLJObcGbsaForces1\n");
// printf("kCalculateCDLJObcGbsaForces1\n");
// check if Born radii need to be calculated
kClearBornForces
(
gpu
);
switch
(
gpu
->
sim
.
nonbondedMethod
)
switch
(
gpu
->
sim
.
nonbondedMethod
)
{
{
case
NO_CUTOFF
:
case
NO_CUTOFF
:
...
...
platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu
View file @
f5ea8297
...
@@ -102,21 +102,6 @@ void GetCalculateGBVIBornSumSim(gpuContext gpu)
...
@@ -102,21 +102,6 @@ void GetCalculateGBVIBornSumSim(gpuContext gpu)
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateGBVIBornSum.h"
#include "kCalculateGBVIBornSum.h"
__global__
void
kClearGBVIBornSum_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
stride
*
cSim
.
nonbondOutputBuffers
)
{
((
float
*
)
cSim
.
pBornSum
)[
pos
]
=
0.0
f
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
kClearGBVIBornSum
(
gpuContext
gpu
)
{
kClearGBVIBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
}
__global__
void
kReduceGBVIBornSum_kernel
()
__global__
void
kReduceGBVIBornSum_kernel
()
{
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
...
@@ -177,8 +162,6 @@ void kReduceGBVIBornSum(gpuContext gpu)
...
@@ -177,8 +162,6 @@ void kReduceGBVIBornSum(gpuContext gpu)
void
kCalculateGBVIBornSum
(
gpuContext
gpu
)
void
kCalculateGBVIBornSum
(
gpuContext
gpu
)
{
{
//printf("kCalculateGBVIBornSum\n");
//printf("kCalculateGBVIBornSum\n");
kClearGBVIBornSum
(
gpu
);
LAUNCHERROR
(
"kClearBornSum"
);
//size_t numWithInteractions;
//size_t numWithInteractions;
switch
(
gpu
->
sim
.
nonbondedMethod
)
switch
(
gpu
->
sim
.
nonbondedMethod
)
{
{
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
View file @
f5ea8297
...
@@ -97,19 +97,6 @@ void GetCalculateObcGbsaBornSumSim(gpuContext gpu)
...
@@ -97,19 +97,6 @@ void GetCalculateObcGbsaBornSumSim(gpuContext gpu)
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateObcGbsaBornSum.h"
#include "kCalculateObcGbsaBornSum.h"
__global__
__launch_bounds__
(
384
,
1
)
void
kClearObcGbsaBornSum_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
stride
*
cSim
.
nonbondOutputBuffers
)
{
((
float
*
)
cSim
.
pBornSum
)[
pos
]
=
0.0
f
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
__global__
__global__
__launch_bounds__
(
384
,
1
)
__launch_bounds__
(
384
,
1
)
void
kReduceObcGbsaBornSum_kernel
()
void
kReduceObcGbsaBornSum_kernel
()
...
@@ -154,17 +141,9 @@ void kReduceObcGbsaBornSum(gpuContext gpu)
...
@@ -154,17 +141,9 @@ void kReduceObcGbsaBornSum(gpuContext gpu)
LAUNCHERROR
(
"kReduceObcGbsaBornSum"
);
LAUNCHERROR
(
"kReduceObcGbsaBornSum"
);
}
}
extern
void
kClearObcGbsaBornSum
(
gpuContext
gpu
)
{
// printf("kClearObcGbsaBornSum\n");
kClearObcGbsaBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
}
void
kCalculateObcGbsaBornSum
(
gpuContext
gpu
)
void
kCalculateObcGbsaBornSum
(
gpuContext
gpu
)
{
{
// printf("kCalculateObcgbsaBornSum\n");
// printf("kCalculateObcgbsaBornSum\n");
kClearObcGbsaBornSum
(
gpu
);
LAUNCHERROR
(
"kClearBornSum"
);
switch
(
gpu
->
sim
.
nonbondedMethod
)
switch
(
gpu
->
sim
.
nonbondedMethod
)
{
{
case
NO_CUTOFF
:
case
NO_CUTOFF
:
...
...
platforms/cuda/src/kernels/kForces.cu
View file @
f5ea8297
...
@@ -53,8 +53,8 @@ void GetForcesSim(gpuContext gpu)
...
@@ -53,8 +53,8 @@ void GetForcesSim(gpuContext gpu)
RTERROR
(
status
,
"cudaMemcpyFromSymbol: GetForcesSim copy from cSim failed"
);
RTERROR
(
status
,
"cudaMemcpyFromSymbol: GetForcesSim copy from cSim failed"
);
}
}
__global__
__global__
__launch_bounds__
(
384
,
1
)
__launch_bounds__
(
384
,
1
)
void
kClearForces_kernel
()
void
kClearForces_kernel
()
{
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
@@ -72,27 +72,34 @@ void kClearForces(gpuContext gpu)
...
@@ -72,27 +72,34 @@ void kClearForces(gpuContext gpu)
LAUNCHERROR
(
"kClearForces"
);
LAUNCHERROR
(
"kClearForces"
);
}
}
__global__
__global__
__launch_bounds__
(
384
,
1
)
__launch_bounds__
(
384
,
1
)
void
kClearBornForces_kernel
()
void
kClearBorn
SumAnd
Forces_kernel
()
{
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
stride
*
cSim
.
nonbondOutputBuffers
)
while
(
pos
<
cSim
.
stride
*
cSim
.
nonbondOutputBuffers
)
{
{
((
float
*
)
cSim
.
pBornForce
)[
pos
]
=
0.0
f
;
cSim
.
pBornSum
[
pos
]
=
0.0
f
;
cSim
.
pBornForce
[
pos
]
=
0.0
f
;
cSim
.
pForce4
[
pos
]
=
make_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
);
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
while
(
pos
<
cSim
.
stride
*
cSim
.
outputBuffers
)
{
cSim
.
pForce4
[
pos
]
=
make_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
);
pos
+=
gridDim
.
x
*
blockDim
.
x
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
}
}
void
kClearBornForces
(
gpuContext
gpu
)
void
kClearBorn
SumAnd
Forces
(
gpuContext
gpu
)
{
{
// printf("kClearBornForces\n");
// printf("kClearBorn
SumAnd
Forces\n");
kClearBornForces_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
kClearBorn
SumAnd
Forces_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
LAUNCHERROR
(
"kClearBornForces"
);
LAUNCHERROR
(
"kClearBorn
SumAnd
Forces"
);
}
}
__global__
__global__
__launch_bounds__
(
384
,
1
)
__launch_bounds__
(
384
,
1
)
void
kClearEnergy_kernel
()
void
kClearEnergy_kernel
()
{
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
@@ -110,15 +117,15 @@ void kClearEnergy(gpuContext gpu)
...
@@ -110,15 +117,15 @@ void kClearEnergy(gpuContext gpu)
LAUNCHERROR
(
"kClearEnergy"
);
LAUNCHERROR
(
"kClearEnergy"
);
}
}
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
#endif
void
kReduceBornSumAndForces_kernel
()
void
kReduceBornSumAndForces_kernel
()
{
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
...
@@ -221,14 +228,14 @@ void kReduceBornSumAndForces(gpuContext gpu)
...
@@ -221,14 +228,14 @@ void kReduceBornSumAndForces(gpuContext gpu)
LAUNCHERROR
(
"kReduceBornSumAndForces"
);
LAUNCHERROR
(
"kReduceBornSumAndForces"
);
}
}
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
#endif
void
kReduceForces_kernel
()
void
kReduceForces_kernel
()
{
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
...
@@ -291,15 +298,15 @@ double kReduceEnergy(gpuContext gpu)
...
@@ -291,15 +298,15 @@ double kReduceEnergy(gpuContext gpu)
return
sum
;
return
sum
;
}
}
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
#endif
void
kReduceObcGbsaBornForces_kernel
()
void
kReduceObcGbsaBornForces_kernel
()
{
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
float
energy
=
0.0
f
;
float
energy
=
0.0
f
;
...
@@ -357,15 +364,15 @@ void kReduceObcGbsaBornForces_kernel()
...
@@ -357,15 +364,15 @@ void kReduceObcGbsaBornForces_kernel()
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
/
-
6.0
f
;
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
/
-
6.0
f
;
}
}
__global__
__global__
#if (__CUDA_ARCH__ >= 200)
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
#endif
void
kReduceGBVIBornForces_kernel
()
void
kReduceGBVIBornForces_kernel
()
{
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
float
energy
=
0.0
f
;
float
energy
=
0.0
f
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment