Commit a381a3ab authored by peastman's avatar peastman
Browse files

Merge branch 'master' into gayberne

parents 5ecc8e00 1f7866ad
......@@ -4,6 +4,7 @@
__kernel void computeGradientChainRuleTerms(__global real4* restrict forceBuffers, __global const real4* restrict posq
PARAMETER_ARGUMENTS) {
INIT_PARAM_DERIVS
unsigned int index = get_global_id(0);
while (index < NUM_ATOMS) {
real4 pos = posq[index];
......@@ -12,4 +13,5 @@ __kernel void computeGradientChainRuleTerms(__global real4* restrict forceBuffer
forceBuffers[index] = force;
index += get_global_size(0);
}
SAVE_PARAM_DERIVS
}
......@@ -74,6 +74,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
COMPUTE_VALUE
}
value += tempValue1;
ADD_TEMP_DERIVS1
#ifdef USE_CUTOFF
}
#endif
......@@ -123,6 +124,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
}
value += tempValue1;
local_value[tbx+tj] += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF
}
#endif
......@@ -137,18 +140,23 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&global_value[offset], (long) (value*0x100000000));
unsigned int offset1 = x*TILE_SIZE + tgx;
atom_add(&global_value[offset1], (long) (value*0x100000000));
STORE_PARAM_DERIVS1
if (x != y) {
offset = y*TILE_SIZE + tgx;
atom_add(&global_value[offset], (long) (local_value[get_local_id(0)]*0x100000000));
unsigned int offset2 = y*TILE_SIZE + tgx;
atom_add(&global_value[offset2], (long) (local_value[get_local_id(0)]*0x100000000));
STORE_PARAM_DERIVS2
}
#else
unsigned int offset1 = x*TILE_SIZE + tgx + warp*PADDED_NUM_ATOMS;
unsigned int offset2 = y*TILE_SIZE + tgx + warp*PADDED_NUM_ATOMS;
global_value[offset1] += value;
if (x != y)
STORE_PARAM_DERIVS1
if (x != y) {
global_value[offset2] += local_value[get_local_id(0)];
STORE_PARAM_DERIVS2
}
#endif
}
......@@ -157,6 +165,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else
......@@ -178,42 +188,38 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
else
#endif
{
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) {
SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) {
SYNC_WARPS;
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[skipBase+tgx];
skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[skipBase+tgx];
skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
else
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
}
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
#endif
if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx;
......@@ -261,6 +267,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
}
value += tempValue1;
local_value[tbx+tj] += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
}
tj = (tj + 1) & (TILE_SIZE - 1);
SYNC_WARPS;
......@@ -294,6 +302,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
}
value += tempValue1;
local_value[tbx+tj] += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF
}
#endif
......@@ -310,15 +320,23 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
unsigned int atom2 = y*TILE_SIZE + tgx;
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&global_value[atom1], (long) (value*0x100000000));
if (atom2 < PADDED_NUM_ATOMS)
atom_add(&global_value[atom2], (long) (local_value[get_local_id(0)]*0x100000000));
unsigned int offset1 = atom1;
atom_add(&global_value[offset1], (long) (value*0x100000000));
STORE_PARAM_DERIVS1
if (atom2 < PADDED_NUM_ATOMS) {
unsigned int offset2 = atom2;
atom_add(&global_value[offset2], (long) (local_value[get_local_id(0)]*0x100000000));
STORE_PARAM_DERIVS2
}
#else
unsigned int offset1 = atom1 + warp*PADDED_NUM_ATOMS;
unsigned int offset2 = atom2 + warp*PADDED_NUM_ATOMS;
global_value[offset1] += value;
if (atom2 < PADDED_NUM_ATOMS)
STORE_PARAM_DERIVS1
if (atom2 < PADDED_NUM_ATOMS) {
unsigned int offset2 = atom2 + warp*PADDED_NUM_ATOMS;
global_value[offset2] += local_value[get_local_id(0)];
STORE_PARAM_DERIVS2
}
#endif
}
pos++;
......
......@@ -21,6 +21,7 @@ __kernel void computePerParticleValues(int bufferSize, int numBuffers, __global
for (int i = index+bufferSize; i < totalSize; i += bufferSize)
sum += valueBuffers[i];
#endif
REDUCE_PARAM0_DERIV
// Now calculate other values
......
......@@ -25,7 +25,8 @@ void storePos(__global real4* restrict posq, __global real4* restrict posqCorrec
__kernel void computePerDof(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict posDelta,
__global mixed4* restrict velm, __global const real4* restrict force, __global const mixed2* restrict dt, __global const mixed* restrict globals,
__global mixed* restrict sum, __global const float4* restrict gaussianValues, unsigned int gaussianBaseIndex, __global const float4* restrict uniformValues, const real energy
__global mixed* restrict sum, __global const float4* restrict gaussianValues, unsigned int gaussianBaseIndex, __global const float4* restrict uniformValues,
const real energy, __global mixed* restrict energyParamDerivs
PARAMETER_ARGUMENTS) {
mixed stepSize = dt[0].y;
int index = get_global_id(0);
......
......@@ -4,15 +4,18 @@ if (!isExcluded && r2 < CUTOFF_SQUARED) {
if (!isExcluded) {
#endif
real tempForce = 0.0f;
COMPUTE_FORCE
real switchValue = 1, switchDeriv = 0;
#if USE_SWITCH
if (r > SWITCH_CUTOFF) {
real x = r-SWITCH_CUTOFF;
real switchValue = 1+x*x*x*(SWITCH_C3+x*(SWITCH_C4+x*SWITCH_C5));
real switchDeriv = x*x*(3*SWITCH_C3+x*(4*SWITCH_C4+x*5*SWITCH_C5));
tempForce = tempForce*switchValue - tempEnergy*switchDeriv;
tempEnergy *= switchValue;
switchValue = 1+x*x*x*(SWITCH_C3+x*(SWITCH_C4+x*SWITCH_C5));
switchDeriv = x*x*(3*SWITCH_C3+x*(4*SWITCH_C4+x*5*SWITCH_C5));
}
#endif
COMPUTE_FORCE
#if USE_SWITCH
tempForce = tempForce*switchValue - tempEnergy*switchDeriv;
tempEnergy *= switchValue;
#endif
dEdR += tempForce*invR;
}
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -214,6 +214,8 @@ __kernel void computeNonbonded(
#ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+get_group_id(0)*(long)numTileIndices/get_num_groups(0)) : get_group_id(0)*(long)numTiles/get_num_groups(0));
int end = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+(get_group_id(0)+1)*(long)numTileIndices/get_num_groups(0)) : (get_group_id(0)+1)*(long)numTiles/get_num_groups(0));
#else
......@@ -234,35 +236,31 @@ __kernel void computeNonbonded(
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
}
else
#endif
{
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while (nextToSkip < pos) {
if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[currentSkipIndex++];
nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
nextToSkip = end;
while (nextToSkip < pos) {
if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[currentSkipIndex++];
nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
includeTile = (nextToSkip != pos);
else
nextToSkip = end;
}
includeTile = (nextToSkip != pos);
#endif
if (includeTile) {
// Load the data for this tile.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment