"vscode:/vscode.git/clone" did not exist on "db4efbf4bcb851a185879099ac01fdc61e34a062"
Commit 81cd9f5f authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed problems on compute level 1.2 GPUs

parent ff5f5d5a
...@@ -60,7 +60,7 @@ void GetSettleSim(gpuContext gpu) ...@@ -60,7 +60,7 @@ void GetSettleSim(gpuContext gpu)
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_SHAKE_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_SHAKE_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_SHAKE_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_SHAKE_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_SHAKE_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_SHAKE_THREADS_PER_BLOCK, 1)
...@@ -242,7 +242,7 @@ void kApplyFirstSettle(gpuContext gpu) ...@@ -242,7 +242,7 @@ void kApplyFirstSettle(gpuContext gpu)
__global__ void __global__ void
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_SHAKE_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_SHAKE_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_SHAKE_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_SHAKE_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_SHAKE_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_SHAKE_THREADS_PER_BLOCK, 1)
......
...@@ -67,7 +67,7 @@ void GetShakeHSim(gpuContext gpu) ...@@ -67,7 +67,7 @@ void GetShakeHSim(gpuContext gpu)
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_SHAKE_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_SHAKE_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_SHAKE_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_SHAKE_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_SHAKE_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_SHAKE_THREADS_PER_BLOCK, 1)
...@@ -238,7 +238,7 @@ void kApplyFirstShake(gpuContext gpu) ...@@ -238,7 +238,7 @@ void kApplyFirstShake(gpuContext gpu)
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_SHAKE_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_SHAKE_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_SHAKE_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_SHAKE_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_SHAKE_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_SHAKE_THREADS_PER_BLOCK, 1)
...@@ -430,7 +430,7 @@ void kApplySecondShake_kernel() ...@@ -430,7 +430,7 @@ void kApplySecondShake_kernel()
__global__ void __global__ void
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_SHAKE_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_SHAKE_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_SHAKE_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_SHAKE_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_SHAKE_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_SHAKE_THREADS_PER_BLOCK, 1)
......
...@@ -92,7 +92,7 @@ void kVerletUpdatePart2(gpuContext gpu) ...@@ -92,7 +92,7 @@ void kVerletUpdatePart2(gpuContext gpu)
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_UPDATE_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_UPDATE_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_UPDATE_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_UPDATE_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_UPDATE_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_UPDATE_THREADS_PER_BLOCK, 1)
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_UPDATE_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_UPDATE_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_UPDATE_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_UPDATE_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_UPDATE_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_UPDATE_THREADS_PER_BLOCK, 1)
...@@ -122,7 +122,7 @@ void kVerletUpdatePart1_kernel() ...@@ -122,7 +122,7 @@ void kVerletUpdatePart1_kernel()
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_UPDATE_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_UPDATE_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_UPDATE_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_UPDATE_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_UPDATE_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_UPDATE_THREADS_PER_BLOCK, 1)
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(384, 1) __launch_bounds__(384, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(128, 1) __launch_bounds__(128, 1)
#else #else
__launch_bounds__(64, 1) __launch_bounds__(64, 1)
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(256, 1) __launch_bounds__(256, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(128, 1) __launch_bounds__(128, 1)
#else #else
__launch_bounds__(64, 1) __launch_bounds__(64, 1)
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
......
...@@ -1576,7 +1576,7 @@ static void kReduce_dBorn(amoebaGpuContext amoebaGpu ) ...@@ -1576,7 +1576,7 @@ static void kReduce_dBorn(amoebaGpuContext amoebaGpu )
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
...@@ -1633,7 +1633,7 @@ __launch_bounds__(G8X_THREADS_PER_BLOCK, 1) ...@@ -1633,7 +1633,7 @@ __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(512, 1) __launch_bounds__(512, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(128, 1) __launch_bounds__(128, 1)
#else #else
__launch_bounds__(64, 1) __launch_bounds__(64, 1)
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(384, 1) __launch_bounds__(384, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(96, 1) __launch_bounds__(96, 1)
#else #else
__launch_bounds__(32, 1) __launch_bounds__(32, 1)
......
...@@ -298,7 +298,7 @@ __device__ void bicubic( float4 y, float4 y1i, float4 y2i, float4 y12i, float x1 ...@@ -298,7 +298,7 @@ __device__ void bicubic( float4 y, float4 y1i, float4 y2i, float4 y12i, float x1
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_LOCALFORCES_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_LOCALFORCES_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_LOCALFORCES_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_LOCALFORCES_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_LOCALFORCES_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_LOCALFORCES_THREADS_PER_BLOCK, 1)
......
...@@ -54,7 +54,7 @@ __device__ static void crossVector3( float* vector1, float* vector2, float* vect ...@@ -54,7 +54,7 @@ __device__ static void crossVector3( float* vector1, float* vector2, float* vect
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
...@@ -345,7 +345,7 @@ void amoebaMapTorqueToForce_kernel( float* torque, int maxDiff, float* tempElecF ...@@ -345,7 +345,7 @@ void amoebaMapTorqueToForce_kernel( float* torque, int maxDiff, float* tempElecF
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
...@@ -513,7 +513,7 @@ void amoebaMapTorqueToForceOld_kernel( float* torque, int maxDiff, float* tempEl ...@@ -513,7 +513,7 @@ void amoebaMapTorqueToForceOld_kernel( float* torque, int maxDiff, float* tempEl
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
...@@ -566,7 +566,7 @@ void amoebaMapTorqueReduce_kernel( ...@@ -566,7 +566,7 @@ void amoebaMapTorqueReduce_kernel(
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
...@@ -620,7 +620,7 @@ void amoebaMapTorqueReduce_kernel2( ...@@ -620,7 +620,7 @@ void amoebaMapTorqueReduce_kernel2(
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
......
...@@ -239,7 +239,7 @@ __device__ static int debugAccumulate( int index, float4* debugArray, float* fie ...@@ -239,7 +239,7 @@ __device__ static int debugAccumulate( int index, float4* debugArray, float* fie
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
...@@ -273,7 +273,7 @@ void kInitializeMutualInducedAndGkField_kernel( ...@@ -273,7 +273,7 @@ void kInitializeMutualInducedAndGkField_kernel(
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
...@@ -342,7 +342,7 @@ void kReduceMutualInducedAndGkFieldDelta_kernel( float* arrayOfDeltas1, float* a ...@@ -342,7 +342,7 @@ void kReduceMutualInducedAndGkFieldDelta_kernel( float* arrayOfDeltas1, float* a
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
...@@ -375,7 +375,7 @@ void kSorUpdateMutualInducedAndGkField_kernel( ...@@ -375,7 +375,7 @@ void kSorUpdateMutualInducedAndGkField_kernel(
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(384, 1) __launch_bounds__(384, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(128, 1) __launch_bounds__(128, 1)
#else #else
__launch_bounds__(64, 1) __launch_bounds__(64, 1)
......
...@@ -106,7 +106,7 @@ __device__ void calculateMutualInducedFieldPairIxn_kernel( MutualInducedParticle ...@@ -106,7 +106,7 @@ __device__ void calculateMutualInducedFieldPairIxn_kernel( MutualInducedParticle
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
...@@ -134,7 +134,7 @@ void kInitializeMutualInducedField_kernel( ...@@ -134,7 +134,7 @@ void kInitializeMutualInducedField_kernel(
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
...@@ -182,7 +182,7 @@ void kReduceMutualInducedFieldDelta_kernel(int numberOfEntries, float* arrayOfDe ...@@ -182,7 +182,7 @@ void kReduceMutualInducedFieldDelta_kernel(int numberOfEntries, float* arrayOfDe
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
......
...@@ -163,7 +163,7 @@ void kComputeAmoebaBsplines_kernel() ...@@ -163,7 +163,7 @@ void kComputeAmoebaBsplines_kernel()
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(1024, 1) __launch_bounds__(1024, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(512, 1) __launch_bounds__(512, 1)
#else #else
__launch_bounds__(256, 1) __launch_bounds__(256, 1)
......
...@@ -34,7 +34,7 @@ void GetCalculateAmoebaCudaPmeFixedEFieldSim(amoebaGpuContext amoebaGpu) ...@@ -34,7 +34,7 @@ void GetCalculateAmoebaCudaPmeFixedEFieldSim(amoebaGpuContext amoebaGpu)
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
...@@ -83,7 +83,7 @@ static void kReducePmeEFieldPolar_kernel( unsigned int fieldComponents, unsigned ...@@ -83,7 +83,7 @@ static void kReducePmeEFieldPolar_kernel( unsigned int fieldComponents, unsigned
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
......
...@@ -228,7 +228,7 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce ...@@ -228,7 +228,7 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
...@@ -256,7 +256,7 @@ static void kInitializeMutualInducedField_kernel( ...@@ -256,7 +256,7 @@ static void kInitializeMutualInducedField_kernel(
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
...@@ -313,7 +313,7 @@ static void kReduceMutualInducedFieldDelta_kernel(int numberOfEntries, float* ar ...@@ -313,7 +313,7 @@ static void kReduceMutualInducedFieldDelta_kernel(int numberOfEntries, float* ar
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
......
...@@ -8,7 +8,7 @@ typedef unsigned int uint; ...@@ -8,7 +8,7 @@ typedef unsigned int uint;
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
...@@ -73,7 +73,7 @@ void METHOD_NAME(kCalculateAmoebaReduce, N2ToNBlockLevel)( float *N2Array, float ...@@ -73,7 +73,7 @@ void METHOD_NAME(kCalculateAmoebaReduce, N2ToNBlockLevel)( float *N2Array, float
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment