Commit 2f1c9531 authored by Scott Le Grand's avatar Scott Le Grand
Browse files

No commit message

No commit message
parent c85128ac
...@@ -66,7 +66,15 @@ __device__ void kSyncAllThreads_kernel(short* syncCounter, short newCount) ...@@ -66,7 +66,15 @@ __device__ void kSyncAllThreads_kernel(short* syncCounter, short newCount)
__syncthreads(); __syncthreads();
} }
__global__ void kApplyCCMA_kernel(float4* atomPositions, bool addOldPosition) __global__ void
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(1024, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(512, 1)
#else
__launch_bounds__(256, 1)
#endif
kApplyCCMA_kernel(float4* atomPositions, bool addOldPosition)
{ {
// Initialize counters used for monitoring convergence and doing global thread synchronization. // Initialize counters used for monitoring convergence and doing global thread synchronization.
......
...@@ -78,7 +78,15 @@ void SetCustomBondGlobalParams(const vector<float>& paramValues) ...@@ -78,7 +78,15 @@ void SetCustomBondGlobalParams(const vector<float>& paramValues)
} }
__global__ void kCalculateCustomBondForces_kernel() __global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(1024, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(512, 1)
#else
__launch_bounds__(256, 1)
#endif
void kCalculateCustomBondForces_kernel()
{ {
extern __shared__ float stack[]; extern __shared__ float stack[];
float* variables = (float*) &stack[cSim.customExpressionStackSize*blockDim.x]; float* variables = (float*) &stack[cSim.customExpressionStackSize*blockDim.x];
......
...@@ -82,7 +82,15 @@ void SetCustomExternalGlobalParams(const vector<float>& paramValues) ...@@ -82,7 +82,15 @@ void SetCustomExternalGlobalParams(const vector<float>& paramValues)
} }
__global__ void kCalculateCustomExternalForces_kernel() __global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(1024, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(512, 1)
#else
__launch_bounds__(256, 1)
#endif
void kCalculateCustomExternalForces_kernel()
{ {
extern __shared__ float stack[]; extern __shared__ float stack[];
float* variables = (float*) &stack[cSim.customExpressionStackSize*blockDim.x]; float* variables = (float*) &stack[cSim.customExpressionStackSize*blockDim.x];
......
...@@ -92,7 +92,15 @@ inline __host__ __device__ float4 make_float4(int3 a) ...@@ -92,7 +92,15 @@ inline __host__ __device__ float4 make_float4(int3 a)
return make_float4((float) a.x, (float) a.y, (float) a.z, 0); return make_float4((float) a.x, (float) a.y, (float) a.z, 0);
} }
__global__ void kUpdateGridIndexAndFraction_kernel() __global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(1024, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(512, 1)
#else
__launch_bounds__(256, 1)
#endif
void kUpdateGridIndexAndFraction_kernel()
{ {
unsigned int tnb = blockDim.x * gridDim.x; unsigned int tnb = blockDim.x * gridDim.x;
unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x; unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;
...@@ -114,7 +122,15 @@ __global__ void kUpdateGridIndexAndFraction_kernel() ...@@ -114,7 +122,15 @@ __global__ void kUpdateGridIndexAndFraction_kernel()
* For each grid point, find the range of sorted atoms associated with that point. * For each grid point, find the range of sorted atoms associated with that point.
*/ */
__global__ void kFindAtomRangeForGrid_kernel() __global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(1024, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(512, 1)
#else
__launch_bounds__(256, 1)
#endif
void kFindAtomRangeForGrid_kernel()
{ {
int thread = blockIdx.x*blockDim.x+threadIdx.x; int thread = blockIdx.x*blockDim.x+threadIdx.x;
int start = (cSim.atoms*thread)/(blockDim.x*gridDim.x); int start = (cSim.atoms*thread)/(blockDim.x*gridDim.x);
...@@ -147,7 +163,15 @@ __global__ void kFindAtomRangeForGrid_kernel() ...@@ -147,7 +163,15 @@ __global__ void kFindAtomRangeForGrid_kernel()
} }
} }
__global__ void kUpdateBsplines_kernel() __global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(1024, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(512, 1)
#else
__launch_bounds__(256, 1)
#endif
void kUpdateBsplines_kernel()
{ {
unsigned int tnb = blockDim.x * gridDim.x; unsigned int tnb = blockDim.x * gridDim.x;
unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x; unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;
...@@ -217,7 +241,15 @@ __global__ void kUpdateBsplines_kernel() ...@@ -217,7 +241,15 @@ __global__ void kUpdateBsplines_kernel()
} }
} }
__global__ void kGridSpreadCharge_kernel() __global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(1024, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(512, 1)
#else
__launch_bounds__(256, 1)
#endif
void kGridSpreadCharge_kernel()
{ {
unsigned int numGridPoints = cSim.pmeGridSize.x*cSim.pmeGridSize.y*cSim.pmeGridSize.z; unsigned int numGridPoints = cSim.pmeGridSize.x*cSim.pmeGridSize.y*cSim.pmeGridSize.z;
unsigned int numThreads = gridDim.x*blockDim.x; unsigned int numThreads = gridDim.x*blockDim.x;
...@@ -254,7 +286,15 @@ __global__ void kGridSpreadCharge_kernel() ...@@ -254,7 +286,15 @@ __global__ void kGridSpreadCharge_kernel()
} }
} }
__global__ void kReciprocalConvolution_kernel() __global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(1024, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(512, 1)
#else
__launch_bounds__(256, 1)
#endif
void kReciprocalConvolution_kernel()
{ {
const unsigned int gridSize = cSim.pmeGridSize.x*cSim.pmeGridSize.y*cSim.pmeGridSize.z; const unsigned int gridSize = cSim.pmeGridSize.x*cSim.pmeGridSize.y*cSim.pmeGridSize.z;
float expFactor = LOCAL_HACK_PI*LOCAL_HACK_PI/(cSim.alphaEwald*cSim.alphaEwald); float expFactor = LOCAL_HACK_PI*LOCAL_HACK_PI/(cSim.alphaEwald*cSim.alphaEwald);
...@@ -287,7 +327,15 @@ __global__ void kReciprocalConvolution_kernel() ...@@ -287,7 +327,15 @@ __global__ void kReciprocalConvolution_kernel()
cSim.pEnergy[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*energy; cSim.pEnergy[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*energy;
} }
__global__ void kGridInterpolateForce_kernel() __global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(1024, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(512, 1)
#else
__launch_bounds__(256, 1)
#endif
void kGridInterpolateForce_kernel()
{ {
for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < cSim.atoms; atom += blockDim.x*gridDim.x) for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < cSim.atoms; atom += blockDim.x*gridDim.x)
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment