Commit e47cf907 authored by peastman's avatar peastman
Browse files

Bug fixes to parameter derivatives for OpenCL CustomGBForce

parent dbdf1c68
......@@ -28,6 +28,7 @@ __kernel void computeN2Energy(
#endif
PARAMETER_ARGUMENTS) {
mixed energy = 0;
INIT_PARAM_DERIVS
// First loop: process tiles that contain exclusions.
......@@ -74,6 +75,7 @@ __kernel void computeN2Energy(
atom2 = y*TILE_SIZE+j;
real dEdR = 0;
real tempEnergy = 0;
const real interactionScale = 0.5f;
#ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1);
#endif
......@@ -140,6 +142,7 @@ __kernel void computeN2Energy(
atom2 = y*TILE_SIZE+j;
real dEdR = 0;
real tempEnergy = 0;
const real interactionScale = 1.0f;
#ifdef USE_EXCLUSIONS
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
if (!isExcluded) {
......@@ -291,6 +294,7 @@ __kernel void computeN2Energy(
atom2 = atomIndices[j];
real dEdR = 0;
real tempEnergy = 0;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION
dEdR /= -r;
energy += tempEnergy;
......@@ -347,6 +351,7 @@ __kernel void computeN2Energy(
atom2 = atomIndices[j];
real dEdR = 0;
real tempEnergy = 0;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION
dEdR /= -r;
energy += tempEnergy;
......@@ -400,4 +405,5 @@ __kernel void computeN2Energy(
pos++;
}
energyBuffer[get_global_id(0)] += energy;
SAVE_PARAM_DERIVS
}
......@@ -267,6 +267,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
}
value += tempValue1;
local_value[tbx+tj] += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
}
tj = (tj + 1) & (TILE_SIZE - 1);
SYNC_WARPS;
......
......@@ -75,6 +75,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
COMPUTE_VALUE
}
value += tempValue1;
ADD_TEMP_DERIVS1
#ifdef USE_CUTOFF
}
#endif
......@@ -133,6 +134,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
}
value += tempValue1;
local_value[j] += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF
}
#endif
......@@ -144,23 +147,26 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&global_value[atom1], (long) (value*0x100000000));
unsigned int offset1 = atom1;
atom_add(&global_value[offset1], (long) (value*0x100000000));
#else
unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset] += value;
unsigned int offset1 = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset1] += value;
#endif
STORE_PARAM_DERIVS1
}
// Write results.
for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = y*TILE_SIZE+tgx;
atom_add(&global_value[offset], (long) (local_value[tgx]*0x100000000));
unsigned int offset2 = y*TILE_SIZE+tgx;
atom_add(&global_value[offset2], (long) (local_value[tgx]*0x100000000));
#else
unsigned int offset = y*TILE_SIZE+tgx + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset] += local_value[tgx];
unsigned int offset2 = y*TILE_SIZE+tgx + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset2] += local_value[tgx];
#endif
STORE_PARAM_DERIVS2
}
}
}
......@@ -260,6 +266,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
COMPUTE_VALUE
value += tempValue1;
local_value[j] += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
}
}
......@@ -305,17 +313,21 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
COMPUTE_VALUE
value += tempValue1;
local_value[j] += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
}
}
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&global_value[atom1], (long) (value*0x100000000));
unsigned int offset1 = atom1;
atom_add(&global_value[offset1], (long) (value*0x100000000));
#else
unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset] += value;
unsigned int offset1 = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset1] += value;
#endif
STORE_PARAM_DERIVS1
}
}
......@@ -329,11 +341,13 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#endif
if (atom2 < PADDED_NUM_ATOMS) {
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&global_value[atom2], (long) (local_value[tgx]*0x100000000));
unsigned int offset2 = atom2;
atom_add(&global_value[offset2], (long) (local_value[tgx]*0x100000000));
#else
unsigned int offset = atom2 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset] += local_value[tgx];
unsigned int offset2 = atom2 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset2] += local_value[tgx];
#endif
STORE_PARAM_DERIVS2
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment