Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
77742df8
Commit
77742df8
authored
Dec 12, 2011
by
Mark Friedrichs
Browse files
Modified threadsPerBlock for several kernels and added launch_bounds
parent
dacebfd3
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
61 additions
and
13 deletions
+61
-13
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreBornSum.cu
...atforms/cuda/src/kernels/kCalculateGBVISoftcoreBornSum.cu
+21
-5
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreBornSum.cu
...orms/cuda/src/kernels/kCalculateObcGbsaSoftcoreBornSum.cu
+40
-8
No files found.
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreBornSum.cu
View file @
77742df8
...
...
@@ -162,7 +162,15 @@ struct Atom {
float
bornRadiusScaleFactor
;
};
__global__
void
kClearGBVISoftcoreBornSum_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kClearGBVISoftcoreBornSum_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
stride
*
cSim
.
nonbondOutputBuffers
)
...
...
@@ -173,7 +181,7 @@ __global__ void kClearGBVISoftcoreBornSum_kernel()
}
void
kClearGBVISoftcoreBornSum
(
gpuContext
gpu
)
{
kClearGBVISoftcoreBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
kClearGBVISoftcoreBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
threads_per_block
>>>
();
}
__global__
...
...
@@ -284,7 +292,7 @@ void kReduceGBVISoftcoreBornSum_kernel()
void
kReduceGBVISoftcoreBornSum
(
freeEnergyGpuContext
freeEnergyGpu
)
{
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
kReduceGBVISoftcoreBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
kReduceGBVISoftcoreBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceGBVISoftcoreBornSum"
);
}
...
...
@@ -333,7 +341,15 @@ void kReduceGBVISoftcoreBornSum( freeEnergyGpuContext freeEnergyGpu )
*
outDerivative
=
-
30.0
f
*
ratio2
*
(
1.0
f
+
ratio
*
(
ratio
-
2.0
f
))
/
denominator
;
}
__global__
void
kReduceGBVIBornSumQuinticScaling_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceGBVIBornSumQuinticScaling_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
...
...
@@ -380,7 +396,7 @@ __global__ void kReduceGBVIBornSumQuinticScaling_kernel()
void
kReduceGBVIBornSumQuinticScaling
(
freeEnergyGpuContext
freeEnergyGpu
)
{
gpuContext
gpu
=
freeEnergyGpu
->
gpuContext
;
kReduceGBVIBornSumQuinticScaling_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
kReduceGBVIBornSumQuinticScaling_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceGBVIBornSumQuinticScaling_kernel"
);
}
...
...
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreBornSum.cu
View file @
77742df8
...
...
@@ -58,7 +58,15 @@ extern "C" void SetCalculateObcGbsaSoftcoreBornSumSim( freeEnergyGpuContext free
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetCalculateObcGbsaSoftcoreBornSumSim copy to gbsaSimDev failed."
);
}
__global__
void
kClearObcGbsaSoftcoreBornSum_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kClearObcGbsaSoftcoreBornSum_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
stride
*
cSim
.
nonbondOutputBuffers
)
...
...
@@ -68,7 +76,15 @@ __global__ void kClearObcGbsaSoftcoreBornSum_kernel()
}
}
__global__
void
kClearSoftcoreBornForces_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kClearSoftcoreBornForces_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
stride
*
cSim
.
nonbondOutputBuffers
)
...
...
@@ -81,17 +97,25 @@ __global__ void kClearSoftcoreBornForces_kernel()
void
kClearSoftcoreBornForces
(
gpuContext
gpu
)
{
// printf("kClearSoftcoreBornForces\n");
kClearSoftcoreBornForces_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
kClearSoftcoreBornForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
threads_per_block
>>>
();
LAUNCHERROR
(
"kClearSoftcoreBornForces"
);
}
void
kClearObcGbsaSoftcoreBornSum
(
gpuContext
gpu
)
{
// printf("kClearObcGbsaBornSum\n");
kClearObcGbsaSoftcoreBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
kClearObcGbsaSoftcoreBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
threads_per_block
>>>
();
}
__global__
void
kReduceObcGbsaSoftcoreBornForces_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceObcGbsaSoftcoreBornForces_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
float
energy
=
0.0
f
;
...
...
@@ -153,7 +177,7 @@ __global__ void kReduceObcGbsaSoftcoreBornForces_kernel()
void
kReduceObcGbsaSoftcoreBornForces
(
gpuContext
gpu
){
kReduceObcGbsaSoftcoreBornForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
bsf_reduce_
threads_per_block
>>>
();
kReduceObcGbsaSoftcoreBornForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceObcGbsaSoftcoreBornForces"
);
}
...
...
@@ -191,7 +215,15 @@ void kReduceObcGbsaSoftcoreBornForces( gpuContext gpu ){
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateObcGbsaSoftcoreBornSum.h"
__global__
void
kReduceObcGbsaSoftcoreBornSum_kernel
()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceObcGbsaSoftcoreBornSum_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
...
...
@@ -226,7 +258,7 @@ __global__ void kReduceObcGbsaSoftcoreBornSum_kernel()
void
kReduceObcGbsaSoftcoreBornSum
(
gpuContext
gpu
)
{
// printf("kReduceObcGbsaSoftcoreBornSum\n");
kReduceObcGbsaSoftcoreBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
kReduceObcGbsaSoftcoreBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
threads_per_block
>>>
();
gpu
->
bRecalculateBornRadii
=
false
;
LAUNCHERROR
(
"kReduceObcGbsaSoftcoreBornSum"
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment