Commit 1a94f435 authored by peastman's avatar peastman
Browse files

Merge pull request #495 from peastman/unsigned

Fixed a rare memory corruption error when roundoff causes tile indices to be wrong
parents 6b2c8d58 ece75910
...@@ -190,7 +190,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -190,7 +190,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
...@@ -203,7 +203,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -203,7 +203,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
else else
#endif #endif
{ {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
......
...@@ -166,7 +166,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -166,7 +166,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
...@@ -179,7 +179,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -179,7 +179,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
else else
#endif #endif
{ {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
......
...@@ -226,7 +226,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa ...@@ -226,7 +226,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
...@@ -239,7 +239,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa ...@@ -239,7 +239,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
else else
#endif #endif
{ {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
...@@ -590,7 +590,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -590,7 +590,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
...@@ -603,7 +603,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -603,7 +603,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
else else
#endif #endif
{ {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
......
...@@ -333,7 +333,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -333,7 +333,7 @@ extern "C" __global__ void computeNonbonded(
bool includeTile = true; bool includeTile = true;
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
...@@ -346,7 +346,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -346,7 +346,7 @@ extern "C" __global__ void computeNonbonded(
else else
#endif #endif
{ {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
......
...@@ -200,7 +200,7 @@ __kernel void computeN2Energy( ...@@ -200,7 +200,7 @@ __kernel void computeN2Energy(
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
...@@ -213,7 +213,7 @@ __kernel void computeN2Energy( ...@@ -213,7 +213,7 @@ __kernel void computeN2Energy(
else else
#endif #endif
{ {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
......
...@@ -216,7 +216,7 @@ __kernel void computeN2Energy( ...@@ -216,7 +216,7 @@ __kernel void computeN2Energy(
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
...@@ -229,7 +229,7 @@ __kernel void computeN2Energy( ...@@ -229,7 +229,7 @@ __kernel void computeN2Energy(
else else
#endif #endif
{ {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
......
...@@ -174,7 +174,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -174,7 +174,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
...@@ -187,7 +187,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -187,7 +187,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
else else
#endif #endif
{ {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
......
...@@ -184,7 +184,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -184,7 +184,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
...@@ -197,7 +197,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -197,7 +197,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
else else
#endif #endif
{ {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
......
...@@ -186,7 +186,7 @@ __kernel void computeBornSum( ...@@ -186,7 +186,7 @@ __kernel void computeBornSum(
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
...@@ -199,7 +199,7 @@ __kernel void computeBornSum( ...@@ -199,7 +199,7 @@ __kernel void computeBornSum(
else else
#endif #endif
{ {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
...@@ -574,7 +574,7 @@ __kernel void computeGBSAForce1( ...@@ -574,7 +574,7 @@ __kernel void computeGBSAForce1(
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
...@@ -587,7 +587,7 @@ __kernel void computeGBSAForce1( ...@@ -587,7 +587,7 @@ __kernel void computeGBSAForce1(
else else
#endif #endif
{ {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
......
...@@ -192,7 +192,7 @@ __kernel void computeBornSum( ...@@ -192,7 +192,7 @@ __kernel void computeBornSum(
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
...@@ -205,7 +205,7 @@ __kernel void computeBornSum( ...@@ -205,7 +205,7 @@ __kernel void computeBornSum(
else else
#endif #endif
{ {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
...@@ -607,7 +607,7 @@ __kernel void computeGBSAForce1( ...@@ -607,7 +607,7 @@ __kernel void computeGBSAForce1(
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
...@@ -620,7 +620,7 @@ __kernel void computeGBSAForce1( ...@@ -620,7 +620,7 @@ __kernel void computeGBSAForce1(
else else
#endif #endif
{ {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
......
...@@ -213,7 +213,7 @@ __kernel void computeNonbonded( ...@@ -213,7 +213,7 @@ __kernel void computeNonbonded(
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
...@@ -226,7 +226,7 @@ __kernel void computeNonbonded( ...@@ -226,7 +226,7 @@ __kernel void computeNonbonded(
else else
#endif #endif
{ {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
......
...@@ -230,7 +230,7 @@ __kernel void computeNonbonded( ...@@ -230,7 +230,7 @@ __kernel void computeNonbonded(
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
...@@ -243,7 +243,7 @@ __kernel void computeNonbonded( ...@@ -243,7 +243,7 @@ __kernel void computeNonbonded(
else else
#endif #endif
{ {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
......
...@@ -97,11 +97,11 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ bornS ...@@ -97,11 +97,11 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ bornS
// Extract the coordinates of this tile // Extract the coordinates of this tile
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx; const unsigned int tbx = threadIdx.x - tgx;
unsigned int x, y; int x, y;
AtomData1 data; AtomData1 data;
data.bornSum = 0; data.bornSum = 0;
if (pos < end) { if (pos < end) {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
...@@ -232,10 +232,10 @@ extern "C" __global__ void computeGKForces( ...@@ -232,10 +232,10 @@ extern "C" __global__ void computeGKForces(
// Extract the coordinates of this tile // Extract the coordinates of this tile
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx; const unsigned int tbx = threadIdx.x - tgx;
unsigned int x, y; int x, y;
AtomData2 data; AtomData2 data;
if (pos < end) { if (pos < end) {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
...@@ -471,10 +471,10 @@ extern "C" __global__ void computeChainRuleForce( ...@@ -471,10 +471,10 @@ extern "C" __global__ void computeChainRuleForce(
// Extract the coordinates of this tile // Extract the coordinates of this tile
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx; const unsigned int tbx = threadIdx.x - tgx;
unsigned int x, y; int x, y;
AtomData3 data; AtomData3 data;
if (pos < end) { if (pos < end) {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
...@@ -766,8 +766,8 @@ extern "C" __global__ void computeEDiffForce( ...@@ -766,8 +766,8 @@ extern "C" __global__ void computeEDiffForce(
while (pos < end) { while (pos < end) {
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
......
...@@ -207,10 +207,10 @@ extern "C" __global__ void computeWCAForce(unsigned long long* __restrict__ forc ...@@ -207,10 +207,10 @@ extern "C" __global__ void computeWCAForce(unsigned long long* __restrict__ forc
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx; const unsigned int tbx = threadIdx.x - tgx;
const unsigned int localGroupIndex = threadIdx.x/TILE_SIZE; const unsigned int localGroupIndex = threadIdx.x/TILE_SIZE;
unsigned int x, y; int x, y;
AtomData data; AtomData data;
if (pos < end) { if (pos < end) {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
......
...@@ -234,14 +234,14 @@ extern "C" __global__ void computeElectrostatics( ...@@ -234,14 +234,14 @@ extern "C" __global__ void computeElectrostatics(
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) if (numTiles <= maxTiles)
x = tiles[pos]; x = tiles[pos];
else else
#endif #endif
{ {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
......
...@@ -599,14 +599,14 @@ extern "C" __global__ void computeFixedField( ...@@ -599,14 +599,14 @@ extern "C" __global__ void computeFixedField(
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) if (numTiles <= maxTiles)
x = tiles[pos]; x = tiles[pos];
else else
#endif #endif
{ {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
......
...@@ -337,14 +337,14 @@ extern "C" __global__ void computeInducedField( ...@@ -337,14 +337,14 @@ extern "C" __global__ void computeInducedField(
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) if (numTiles <= maxTiles)
x = tiles[pos]; x = tiles[pos];
else else
#endif #endif
{ {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
......
...@@ -321,14 +321,14 @@ extern "C" __global__ void computeElectrostatics( ...@@ -321,14 +321,14 @@ extern "C" __global__ void computeElectrostatics(
// Extract the coordinates of this tile. // Extract the coordinates of this tile.
unsigned int x, y; int x, y;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) if (numTiles <= maxTiles)
x = tiles[pos]; x = tiles[pos];
else else
#endif #endif
{ {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1); y += (x < y ? -1 : 1);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment