Commit a381a3ab authored by peastman's avatar peastman
Browse files

Merge branch 'master' into gayberne

parents 5ecc8e00 1f7866ad
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
__kernel void computeGradientChainRuleTerms(__global real4* restrict forceBuffers, __global const real4* restrict posq __kernel void computeGradientChainRuleTerms(__global real4* restrict forceBuffers, __global const real4* restrict posq
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
INIT_PARAM_DERIVS
unsigned int index = get_global_id(0); unsigned int index = get_global_id(0);
while (index < NUM_ATOMS) { while (index < NUM_ATOMS) {
real4 pos = posq[index]; real4 pos = posq[index];
...@@ -12,4 +13,5 @@ __kernel void computeGradientChainRuleTerms(__global real4* restrict forceBuffer ...@@ -12,4 +13,5 @@ __kernel void computeGradientChainRuleTerms(__global real4* restrict forceBuffer
forceBuffers[index] = force; forceBuffers[index] = force;
index += get_global_size(0); index += get_global_size(0);
} }
SAVE_PARAM_DERIVS
} }
...@@ -74,6 +74,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -74,6 +74,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
COMPUTE_VALUE COMPUTE_VALUE
} }
value += tempValue1; value += tempValue1;
ADD_TEMP_DERIVS1
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
} }
#endif #endif
...@@ -123,6 +124,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -123,6 +124,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
} }
value += tempValue1; value += tempValue1;
local_value[tbx+tj] += tempValue2; local_value[tbx+tj] += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
} }
#endif #endif
...@@ -137,18 +140,23 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -137,18 +140,23 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
// Write results. // Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = x*TILE_SIZE + tgx; unsigned int offset1 = x*TILE_SIZE + tgx;
atom_add(&global_value[offset], (long) (value*0x100000000)); atom_add(&global_value[offset1], (long) (value*0x100000000));
STORE_PARAM_DERIVS1
if (x != y) { if (x != y) {
offset = y*TILE_SIZE + tgx; unsigned int offset2 = y*TILE_SIZE + tgx;
atom_add(&global_value[offset], (long) (local_value[get_local_id(0)]*0x100000000)); atom_add(&global_value[offset2], (long) (local_value[get_local_id(0)]*0x100000000));
STORE_PARAM_DERIVS2
} }
#else #else
unsigned int offset1 = x*TILE_SIZE + tgx + warp*PADDED_NUM_ATOMS; unsigned int offset1 = x*TILE_SIZE + tgx + warp*PADDED_NUM_ATOMS;
unsigned int offset2 = y*TILE_SIZE + tgx + warp*PADDED_NUM_ATOMS; unsigned int offset2 = y*TILE_SIZE + tgx + warp*PADDED_NUM_ATOMS;
global_value[offset1] += value; global_value[offset1] += value;
if (x != y) STORE_PARAM_DERIVS1
if (x != y) {
global_value[offset2] += local_value[get_local_id(0)]; global_value[offset2] += local_value[get_local_id(0)];
STORE_PARAM_DERIVS2
}
#endif #endif
} }
...@@ -157,6 +165,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -157,6 +165,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps); int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps); int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else #else
...@@ -178,42 +188,38 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -178,42 +188,38 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { x = tiles[pos];
x = tiles[pos]; real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x]; singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF); #else
} y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
else x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
#endif if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
{ y += (x < y ? -1 : 1);
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. }
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed. // Skip over tiles that have exclusions, since they were already processed.
SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) {
SYNC_WARPS; SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) { if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
SYNC_WARPS; ushort2 tile = exclusionTiles[skipBase+tgx];
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) { skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
ushort2 tile = exclusionTiles[skipBase+tgx];
skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
} }
while (skipTiles[currentSkipIndex] < pos) else
currentSkipIndex++; skipTiles[get_local_id(0)] = end;
includeTile = (skipTiles[currentSkipIndex] != pos); skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
} }
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
#endif
if (includeTile) { if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx; unsigned int atom1 = x*TILE_SIZE + tgx;
...@@ -261,6 +267,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -261,6 +267,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
} }
value += tempValue1; value += tempValue1;
local_value[tbx+tj] += tempValue2; local_value[tbx+tj] += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
} }
tj = (tj + 1) & (TILE_SIZE - 1); tj = (tj + 1) & (TILE_SIZE - 1);
SYNC_WARPS; SYNC_WARPS;
...@@ -294,6 +302,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -294,6 +302,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
} }
value += tempValue1; value += tempValue1;
local_value[tbx+tj] += tempValue2; local_value[tbx+tj] += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
} }
#endif #endif
...@@ -310,15 +320,23 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -310,15 +320,23 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
unsigned int atom2 = y*TILE_SIZE + tgx; unsigned int atom2 = y*TILE_SIZE + tgx;
#endif #endif
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&global_value[atom1], (long) (value*0x100000000)); unsigned int offset1 = atom1;
if (atom2 < PADDED_NUM_ATOMS) atom_add(&global_value[offset1], (long) (value*0x100000000));
atom_add(&global_value[atom2], (long) (local_value[get_local_id(0)]*0x100000000)); STORE_PARAM_DERIVS1
if (atom2 < PADDED_NUM_ATOMS) {
unsigned int offset2 = atom2;
atom_add(&global_value[offset2], (long) (local_value[get_local_id(0)]*0x100000000));
STORE_PARAM_DERIVS2
}
#else #else
unsigned int offset1 = atom1 + warp*PADDED_NUM_ATOMS; unsigned int offset1 = atom1 + warp*PADDED_NUM_ATOMS;
unsigned int offset2 = atom2 + warp*PADDED_NUM_ATOMS;
global_value[offset1] += value; global_value[offset1] += value;
if (atom2 < PADDED_NUM_ATOMS) STORE_PARAM_DERIVS1
if (atom2 < PADDED_NUM_ATOMS) {
unsigned int offset2 = atom2 + warp*PADDED_NUM_ATOMS;
global_value[offset2] += local_value[get_local_id(0)]; global_value[offset2] += local_value[get_local_id(0)];
STORE_PARAM_DERIVS2
}
#endif #endif
} }
pos++; pos++;
......
...@@ -21,6 +21,7 @@ __kernel void computePerParticleValues(int bufferSize, int numBuffers, __global ...@@ -21,6 +21,7 @@ __kernel void computePerParticleValues(int bufferSize, int numBuffers, __global
for (int i = index+bufferSize; i < totalSize; i += bufferSize) for (int i = index+bufferSize; i < totalSize; i += bufferSize)
sum += valueBuffers[i]; sum += valueBuffers[i];
#endif #endif
REDUCE_PARAM0_DERIV
// Now calculate other values // Now calculate other values
......
...@@ -25,7 +25,8 @@ void storePos(__global real4* restrict posq, __global real4* restrict posqCorrec ...@@ -25,7 +25,8 @@ void storePos(__global real4* restrict posq, __global real4* restrict posqCorrec
__kernel void computePerDof(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict posDelta, __kernel void computePerDof(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict posDelta,
__global mixed4* restrict velm, __global const real4* restrict force, __global const mixed2* restrict dt, __global const mixed* restrict globals, __global mixed4* restrict velm, __global const real4* restrict force, __global const mixed2* restrict dt, __global const mixed* restrict globals,
__global mixed* restrict sum, __global const float4* restrict gaussianValues, unsigned int gaussianBaseIndex, __global const float4* restrict uniformValues, const real energy __global mixed* restrict sum, __global const float4* restrict gaussianValues, unsigned int gaussianBaseIndex, __global const float4* restrict uniformValues,
const real energy, __global mixed* restrict energyParamDerivs
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
mixed stepSize = dt[0].y; mixed stepSize = dt[0].y;
int index = get_global_id(0); int index = get_global_id(0);
......
...@@ -4,15 +4,18 @@ if (!isExcluded && r2 < CUTOFF_SQUARED) { ...@@ -4,15 +4,18 @@ if (!isExcluded && r2 < CUTOFF_SQUARED) {
if (!isExcluded) { if (!isExcluded) {
#endif #endif
real tempForce = 0.0f; real tempForce = 0.0f;
COMPUTE_FORCE real switchValue = 1, switchDeriv = 0;
#if USE_SWITCH #if USE_SWITCH
if (r > SWITCH_CUTOFF) { if (r > SWITCH_CUTOFF) {
real x = r-SWITCH_CUTOFF; real x = r-SWITCH_CUTOFF;
real switchValue = 1+x*x*x*(SWITCH_C3+x*(SWITCH_C4+x*SWITCH_C5)); switchValue = 1+x*x*x*(SWITCH_C3+x*(SWITCH_C4+x*SWITCH_C5));
real switchDeriv = x*x*(3*SWITCH_C3+x*(4*SWITCH_C4+x*5*SWITCH_C5)); switchDeriv = x*x*(3*SWITCH_C3+x*(4*SWITCH_C4+x*5*SWITCH_C5));
tempForce = tempForce*switchValue - tempEnergy*switchDeriv;
tempEnergy *= switchValue;
} }
#endif
COMPUTE_FORCE
#if USE_SWITCH
tempForce = tempForce*switchValue - tempEnergy*switchDeriv;
tempEnergy *= switchValue;
#endif #endif
dEdR += tempForce*invR; dEdR += tempForce*invR;
} }
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -214,6 +214,8 @@ __kernel void computeNonbonded( ...@@ -214,6 +214,8 @@ __kernel void computeNonbonded(
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0]; const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+get_group_id(0)*(long)numTileIndices/get_num_groups(0)) : get_group_id(0)*(long)numTiles/get_num_groups(0)); int pos = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+get_group_id(0)*(long)numTileIndices/get_num_groups(0)) : get_group_id(0)*(long)numTiles/get_num_groups(0));
int end = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+(get_group_id(0)+1)*(long)numTileIndices/get_num_groups(0)) : (get_group_id(0)+1)*(long)numTiles/get_num_groups(0)); int end = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+(get_group_id(0)+1)*(long)numTileIndices/get_num_groups(0)) : (get_group_id(0)+1)*(long)numTiles/get_num_groups(0));
#else #else
...@@ -234,35 +236,31 @@ __kernel void computeNonbonded( ...@@ -234,35 +236,31 @@ __kernel void computeNonbonded(
int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { x = tiles[pos];
x = tiles[pos]; real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x]; singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF && 0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF); #else
} y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
else x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
#endif if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
{ y += (x < y ? -1 : 1);
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. }
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed. // Skip over tiles that have exclusions, since they were already processed.
while (nextToSkip < pos) { while (nextToSkip < pos) {
if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) { if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[currentSkipIndex++]; ushort2 tile = exclusionTiles[currentSkipIndex++];
nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2; nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
nextToSkip = end;
} }
includeTile = (nextToSkip != pos); else
nextToSkip = end;
} }
includeTile = (nextToSkip != pos);
#endif
if (includeTile) { if (includeTile) {
// Load the data for this tile. // Load the data for this tile.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment