Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
f5ea8297
"platforms/cuda/tests/TestCudaCheckpoints.cpp" did not exist on "17ae3aaeaa7c0518f3ec4a546f0f718f07dd06b3"
Commit
f5ea8297
authored
Mar 25, 2010
by
Peter Eastman
Browse files
Optimization (clear the force and Born sum buffers in a single kernel)
parent
2cb112f0
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
65 additions
and
93 deletions
+65
-93
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+10
-3
platforms/cuda/src/kernels/cudaKernels.h
platforms/cuda/src/kernels/cudaKernels.h
+1
-1
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
+0
-4
platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu
platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu
+0
-17
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
+0
-21
platforms/cuda/src/kernels/kForces.cu
platforms/cuda/src/kernels/kForces.cu
+54
-47
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
f5ea8297
...
...
@@ -47,7 +47,10 @@ void CudaCalcForcesAndEnergyKernel::beginForceComputation(ContextImpl& context)
if
(
data
.
nonbondedMethod
!=
NO_CUTOFF
&&
data
.
computeForceCount
%
100
==
0
)
gpuReorderAtoms
(
gpu
);
data
.
computeForceCount
++
;
kClearForces
(
gpu
);
if
(
gpu
->
bIncludeGBSA
||
gpu
->
bIncludeGBVI
)
kClearBornSumAndForces
(
gpu
);
else
kClearForces
(
gpu
);
}
void
CudaCalcForcesAndEnergyKernel
::
finishForceComputation
(
ContextImpl
&
context
)
{
...
...
@@ -77,6 +80,8 @@ void CudaCalcForcesAndEnergyKernel::beginEnergyComputation(ContextImpl& context)
gpuReorderAtoms
(
gpu
);
data
.
stepCount
++
;
kClearEnergy
(
gpu
);
if
(
gpu
->
bIncludeGBSA
||
gpu
->
bIncludeGBVI
)
kClearBornSumAndForces
(
gpu
);
}
double
CudaCalcForcesAndEnergyKernel
::
finishEnergyComputation
(
ContextImpl
&
context
)
{
...
...
@@ -790,8 +795,10 @@ void OPENMMCUDA_EXPORT OpenMM::cudaOpenMMInitializeIntegration(const System& sys
gpuBuildExclusionList
(
gpu
);
gpuBuildOutputBuffers
(
gpu
);
gpuSetConstants
(
gpu
);
kClearBornForces
(
gpu
);
kClearForces
(
gpu
);
if
(
gpu
->
bIncludeGBSA
||
gpu
->
bIncludeGBVI
)
kClearBornSumAndForces
(
gpu
);
else
kClearForces
(
gpu
);
cudaThreadSynchronize
();
}
...
...
platforms/cuda/src/kernels/cudaKernels.h
View file @
f5ea8297
...
...
@@ -29,7 +29,7 @@
// Initialization
extern
void
kClearForces
(
gpuContext
gpu
);
extern
void
kClearEnergy
(
gpuContext
gpu
);
extern
void
kClearBornForces
(
gpuContext
gpu
);
extern
void
kClearBorn
SumAnd
Forces
(
gpuContext
gpu
);
extern
void
kClearObcGbsaBornSum
(
gpuContext
gpu
);
extern
void
kCalculateObcGbsaBornSum
(
gpuContext
gpu
);
extern
void
kReduceObcGbsaBornSum
(
gpuContext
gpu
);
...
...
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
View file @
f5ea8297
...
...
@@ -134,10 +134,6 @@ extern void kCalculatePME(gpuContext gpu);
void
kCalculateCDLJObcGbsaForces1
(
gpuContext
gpu
)
{
// printf("kCalculateCDLJObcGbsaForces1\n");
// check if Born radii need to be calculated
kClearBornForces
(
gpu
);
switch
(
gpu
->
sim
.
nonbondedMethod
)
{
case
NO_CUTOFF
:
...
...
platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu
View file @
f5ea8297
...
...
@@ -102,21 +102,6 @@ void GetCalculateGBVIBornSumSim(gpuContext gpu)
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateGBVIBornSum.h"
__global__
void
kClearGBVIBornSum_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
stride
*
cSim
.
nonbondOutputBuffers
)
{
((
float
*
)
cSim
.
pBornSum
)[
pos
]
=
0.0
f
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
kClearGBVIBornSum
(
gpuContext
gpu
)
{
kClearGBVIBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
}
__global__
void
kReduceGBVIBornSum_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
...
...
@@ -177,8 +162,6 @@ void kReduceGBVIBornSum(gpuContext gpu)
void
kCalculateGBVIBornSum
(
gpuContext
gpu
)
{
//printf("kCalculateGBVIBornSum\n");
kClearGBVIBornSum
(
gpu
);
LAUNCHERROR
(
"kClearBornSum"
);
//size_t numWithInteractions;
switch
(
gpu
->
sim
.
nonbondedMethod
)
{
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
View file @
f5ea8297
...
...
@@ -97,19 +97,6 @@ void GetCalculateObcGbsaBornSumSim(gpuContext gpu)
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateObcGbsaBornSum.h"
__global__
__launch_bounds__
(
384
,
1
)
void
kClearObcGbsaBornSum_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
stride
*
cSim
.
nonbondOutputBuffers
)
{
((
float
*
)
cSim
.
pBornSum
)[
pos
]
=
0.0
f
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
__global__
__launch_bounds__
(
384
,
1
)
void
kReduceObcGbsaBornSum_kernel
()
...
...
@@ -154,17 +141,9 @@ void kReduceObcGbsaBornSum(gpuContext gpu)
LAUNCHERROR
(
"kReduceObcGbsaBornSum"
);
}
extern
void
kClearObcGbsaBornSum
(
gpuContext
gpu
)
{
// printf("kClearObcGbsaBornSum\n");
kClearObcGbsaBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
}
void
kCalculateObcGbsaBornSum
(
gpuContext
gpu
)
{
// printf("kCalculateObcgbsaBornSum\n");
kClearObcGbsaBornSum
(
gpu
);
LAUNCHERROR
(
"kClearBornSum"
);
switch
(
gpu
->
sim
.
nonbondedMethod
)
{
case
NO_CUTOFF
:
...
...
platforms/cuda/src/kernels/kForces.cu
View file @
f5ea8297
...
...
@@ -53,8 +53,8 @@ void GetForcesSim(gpuContext gpu)
RTERROR
(
status
,
"cudaMemcpyFromSymbol: GetForcesSim copy from cSim failed"
);
}
__global__
__launch_bounds__
(
384
,
1
)
__global__
__launch_bounds__
(
384
,
1
)
void
kClearForces_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
...
@@ -72,27 +72,34 @@ void kClearForces(gpuContext gpu)
LAUNCHERROR
(
"kClearForces"
);
}
__global__
__launch_bounds__
(
384
,
1
)
void
kClearBornForces_kernel
()
__global__
__launch_bounds__
(
384
,
1
)
void
kClearBorn
SumAnd
Forces_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
stride
*
cSim
.
nonbondOutputBuffers
)
{
((
float
*
)
cSim
.
pBornForce
)[
pos
]
=
0.0
f
;
cSim
.
pBornSum
[
pos
]
=
0.0
f
;
cSim
.
pBornForce
[
pos
]
=
0.0
f
;
cSim
.
pForce4
[
pos
]
=
make_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
);
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
while
(
pos
<
cSim
.
stride
*
cSim
.
outputBuffers
)
{
cSim
.
pForce4
[
pos
]
=
make_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
);
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
kClearBornForces
(
gpuContext
gpu
)
void
kClearBorn
SumAnd
Forces
(
gpuContext
gpu
)
{
// printf("kClearBornForces\n");
kClearBornForces_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
LAUNCHERROR
(
"kClearBornForces"
);
// printf("kClearBorn
SumAnd
Forces\n");
kClearBorn
SumAnd
Forces_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
LAUNCHERROR
(
"kClearBorn
SumAnd
Forces"
);
}
__global__
__launch_bounds__
(
384
,
1
)
__global__
__launch_bounds__
(
384
,
1
)
void
kClearEnergy_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
...
@@ -110,15 +117,15 @@ void kClearEnergy(gpuContext gpu)
LAUNCHERROR
(
"kClearEnergy"
);
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceBornSumAndForces_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceBornSumAndForces_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
...
...
@@ -221,14 +228,14 @@ void kReduceBornSumAndForces(gpuContext gpu)
LAUNCHERROR
(
"kReduceBornSumAndForces"
);
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceForces_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
...
...
@@ -291,15 +298,15 @@ double kReduceEnergy(gpuContext gpu)
return
sum
;
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceObcGbsaBornForces_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceObcGbsaBornForces_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
float
energy
=
0.0
f
;
...
...
@@ -357,15 +364,15 @@ void kReduceObcGbsaBornForces_kernel()
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
/
-
6.0
f
;
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceGBVIBornForces_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceGBVIBornForces_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
float
energy
=
0.0
f
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment