Commit 77742df8 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Modified threadsPerBlock for several kernels and added launch_bounds

parent dacebfd3
...@@ -162,7 +162,15 @@ struct Atom { ...@@ -162,7 +162,15 @@ struct Atom {
float bornRadiusScaleFactor; float bornRadiusScaleFactor;
}; };
__global__ void kClearGBVISoftcoreBornSum_kernel() __global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kClearGBVISoftcoreBornSum_kernel()
{ {
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x; unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
while (pos < cSim.stride * cSim.nonbondOutputBuffers) while (pos < cSim.stride * cSim.nonbondOutputBuffers)
...@@ -173,7 +181,7 @@ __global__ void kClearGBVISoftcoreBornSum_kernel() ...@@ -173,7 +181,7 @@ __global__ void kClearGBVISoftcoreBornSum_kernel()
} }
void kClearGBVISoftcoreBornSum(gpuContext gpu) { void kClearGBVISoftcoreBornSum(gpuContext gpu) {
kClearGBVISoftcoreBornSum_kernel<<<gpu->sim.blocks, 384>>>(); kClearGBVISoftcoreBornSum_kernel<<<gpu->sim.blocks, gpu->sim.threads_per_block>>>();
} }
__global__ __global__
...@@ -284,7 +292,7 @@ void kReduceGBVISoftcoreBornSum_kernel() ...@@ -284,7 +292,7 @@ void kReduceGBVISoftcoreBornSum_kernel()
void kReduceGBVISoftcoreBornSum( freeEnergyGpuContext freeEnergyGpu ) void kReduceGBVISoftcoreBornSum( freeEnergyGpuContext freeEnergyGpu )
{ {
gpuContext gpu = freeEnergyGpu->gpuContext; gpuContext gpu = freeEnergyGpu->gpuContext;
kReduceGBVISoftcoreBornSum_kernel<<<gpu->sim.blocks, 384>>>(); kReduceGBVISoftcoreBornSum_kernel<<<gpu->sim.blocks, gpu->sim.threads_per_block>>>();
LAUNCHERROR("kReduceGBVISoftcoreBornSum"); LAUNCHERROR("kReduceGBVISoftcoreBornSum");
} }
...@@ -333,7 +341,15 @@ void kReduceGBVISoftcoreBornSum( freeEnergyGpuContext freeEnergyGpu ) ...@@ -333,7 +341,15 @@ void kReduceGBVISoftcoreBornSum( freeEnergyGpuContext freeEnergyGpu )
*outDerivative = -30.0f*ratio2*( 1.0f + ratio*(ratio - 2.0f))/denominator; *outDerivative = -30.0f*ratio2*( 1.0f + ratio*(ratio - 2.0f))/denominator;
} }
__global__ void kReduceGBVIBornSumQuinticScaling_kernel() __global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kReduceGBVIBornSumQuinticScaling_kernel()
{ {
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x); unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
...@@ -380,7 +396,7 @@ __global__ void kReduceGBVIBornSumQuinticScaling_kernel() ...@@ -380,7 +396,7 @@ __global__ void kReduceGBVIBornSumQuinticScaling_kernel()
void kReduceGBVIBornSumQuinticScaling( freeEnergyGpuContext freeEnergyGpu ) void kReduceGBVIBornSumQuinticScaling( freeEnergyGpuContext freeEnergyGpu )
{ {
gpuContext gpu = freeEnergyGpu->gpuContext; gpuContext gpu = freeEnergyGpu->gpuContext;
kReduceGBVIBornSumQuinticScaling_kernel<<<gpu->sim.blocks, 384>>>(); kReduceGBVIBornSumQuinticScaling_kernel<<<gpu->sim.blocks, gpu->sim.threads_per_block>>>();
LAUNCHERROR("kReduceGBVIBornSumQuinticScaling_kernel"); LAUNCHERROR("kReduceGBVIBornSumQuinticScaling_kernel");
} }
......
...@@ -58,7 +58,15 @@ extern "C" void SetCalculateObcGbsaSoftcoreBornSumSim( freeEnergyGpuContext free ...@@ -58,7 +58,15 @@ extern "C" void SetCalculateObcGbsaSoftcoreBornSumSim( freeEnergyGpuContext free
RTERROR(status, "cudaMemcpyToSymbol: SetCalculateObcGbsaSoftcoreBornSumSim copy to gbsaSimDev failed."); RTERROR(status, "cudaMemcpyToSymbol: SetCalculateObcGbsaSoftcoreBornSumSim copy to gbsaSimDev failed.");
} }
__global__ void kClearObcGbsaSoftcoreBornSum_kernel() __global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kClearObcGbsaSoftcoreBornSum_kernel()
{ {
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x; unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
while (pos < cSim.stride * cSim.nonbondOutputBuffers) while (pos < cSim.stride * cSim.nonbondOutputBuffers)
...@@ -68,7 +76,15 @@ __global__ void kClearObcGbsaSoftcoreBornSum_kernel() ...@@ -68,7 +76,15 @@ __global__ void kClearObcGbsaSoftcoreBornSum_kernel()
} }
} }
__global__ void kClearSoftcoreBornForces_kernel() __global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kClearSoftcoreBornForces_kernel()
{ {
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x; unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
while (pos < cSim.stride * cSim.nonbondOutputBuffers) while (pos < cSim.stride * cSim.nonbondOutputBuffers)
...@@ -81,17 +97,25 @@ __global__ void kClearSoftcoreBornForces_kernel() ...@@ -81,17 +97,25 @@ __global__ void kClearSoftcoreBornForces_kernel()
void kClearSoftcoreBornForces(gpuContext gpu) void kClearSoftcoreBornForces(gpuContext gpu)
{ {
// printf("kClearSoftcoreBornForces\n"); // printf("kClearSoftcoreBornForces\n");
kClearSoftcoreBornForces_kernel<<<gpu->sim.blocks, 384>>>(); kClearSoftcoreBornForces_kernel<<<gpu->sim.blocks, gpu->sim.threads_per_block>>>();
LAUNCHERROR("kClearSoftcoreBornForces"); LAUNCHERROR("kClearSoftcoreBornForces");
} }
void kClearObcGbsaSoftcoreBornSum(gpuContext gpu) void kClearObcGbsaSoftcoreBornSum(gpuContext gpu)
{ {
// printf("kClearObcGbsaBornSum\n"); // printf("kClearObcGbsaBornSum\n");
kClearObcGbsaSoftcoreBornSum_kernel<<<gpu->sim.blocks, 384>>>(); kClearObcGbsaSoftcoreBornSum_kernel<<<gpu->sim.blocks, gpu->sim.threads_per_block>>>();
} }
__global__ void kReduceObcGbsaSoftcoreBornForces_kernel() __global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kReduceObcGbsaSoftcoreBornForces_kernel()
{ {
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x); unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
float energy = 0.0f; float energy = 0.0f;
...@@ -153,7 +177,7 @@ __global__ void kReduceObcGbsaSoftcoreBornForces_kernel() ...@@ -153,7 +177,7 @@ __global__ void kReduceObcGbsaSoftcoreBornForces_kernel()
void kReduceObcGbsaSoftcoreBornForces( gpuContext gpu ){ void kReduceObcGbsaSoftcoreBornForces( gpuContext gpu ){
kReduceObcGbsaSoftcoreBornForces_kernel<<<gpu->sim.blocks, gpu->sim.bsf_reduce_threads_per_block>>>(); kReduceObcGbsaSoftcoreBornForces_kernel<<<gpu->sim.blocks, gpu->sim.threads_per_block>>>();
LAUNCHERROR("kReduceObcGbsaSoftcoreBornForces"); LAUNCHERROR("kReduceObcGbsaSoftcoreBornForces");
} }
...@@ -191,7 +215,15 @@ void kReduceObcGbsaSoftcoreBornForces( gpuContext gpu ){ ...@@ -191,7 +215,15 @@ void kReduceObcGbsaSoftcoreBornForces( gpuContext gpu ){
#define METHOD_NAME(a, b) a##PeriodicByWarp##b #define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateObcGbsaSoftcoreBornSum.h" #include "kCalculateObcGbsaSoftcoreBornSum.h"
__global__ void kReduceObcGbsaSoftcoreBornSum_kernel() __global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kReduceObcGbsaSoftcoreBornSum_kernel()
{ {
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x); unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
...@@ -226,7 +258,7 @@ __global__ void kReduceObcGbsaSoftcoreBornSum_kernel() ...@@ -226,7 +258,7 @@ __global__ void kReduceObcGbsaSoftcoreBornSum_kernel()
void kReduceObcGbsaSoftcoreBornSum(gpuContext gpu) void kReduceObcGbsaSoftcoreBornSum(gpuContext gpu)
{ {
// printf("kReduceObcGbsaSoftcoreBornSum\n"); // printf("kReduceObcGbsaSoftcoreBornSum\n");
kReduceObcGbsaSoftcoreBornSum_kernel<<<gpu->sim.blocks, 384>>>(); kReduceObcGbsaSoftcoreBornSum_kernel<<<gpu->sim.blocks, gpu->sim.threads_per_block>>>();
gpu->bRecalculateBornRadii = false; gpu->bRecalculateBornRadii = false;
LAUNCHERROR("kReduceObcGbsaSoftcoreBornSum"); LAUNCHERROR("kReduceObcGbsaSoftcoreBornSum");
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment