Commit e47cf907 authored by peastman's avatar peastman
Browse files

Bug fixes to parameter derivatives for OpenCL CustomGBForce

parent dbdf1c68
...@@ -28,6 +28,7 @@ __kernel void computeN2Energy( ...@@ -28,6 +28,7 @@ __kernel void computeN2Energy(
#endif #endif
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
mixed energy = 0; mixed energy = 0;
INIT_PARAM_DERIVS
// First loop: process tiles that contain exclusions. // First loop: process tiles that contain exclusions.
...@@ -74,6 +75,7 @@ __kernel void computeN2Energy( ...@@ -74,6 +75,7 @@ __kernel void computeN2Energy(
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
real dEdR = 0; real dEdR = 0;
real tempEnergy = 0; real tempEnergy = 0;
const real interactionScale = 0.5f;
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1); bool isExcluded = !(excl & 0x1);
#endif #endif
...@@ -140,6 +142,7 @@ __kernel void computeN2Energy( ...@@ -140,6 +142,7 @@ __kernel void computeN2Energy(
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
real dEdR = 0; real dEdR = 0;
real tempEnergy = 0; real tempEnergy = 0;
const real interactionScale = 1.0f;
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1)); bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
if (!isExcluded) { if (!isExcluded) {
...@@ -291,6 +294,7 @@ __kernel void computeN2Energy( ...@@ -291,6 +294,7 @@ __kernel void computeN2Energy(
atom2 = atomIndices[j]; atom2 = atomIndices[j];
real dEdR = 0; real dEdR = 0;
real tempEnergy = 0; real tempEnergy = 0;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
dEdR /= -r; dEdR /= -r;
energy += tempEnergy; energy += tempEnergy;
...@@ -347,6 +351,7 @@ __kernel void computeN2Energy( ...@@ -347,6 +351,7 @@ __kernel void computeN2Energy(
atom2 = atomIndices[j]; atom2 = atomIndices[j];
real dEdR = 0; real dEdR = 0;
real tempEnergy = 0; real tempEnergy = 0;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
dEdR /= -r; dEdR /= -r;
energy += tempEnergy; energy += tempEnergy;
...@@ -400,4 +405,5 @@ __kernel void computeN2Energy( ...@@ -400,4 +405,5 @@ __kernel void computeN2Energy(
pos++; pos++;
} }
energyBuffer[get_global_id(0)] += energy; energyBuffer[get_global_id(0)] += energy;
SAVE_PARAM_DERIVS
} }
...@@ -267,6 +267,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -267,6 +267,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
} }
value += tempValue1; value += tempValue1;
local_value[tbx+tj] += tempValue2; local_value[tbx+tj] += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
} }
tj = (tj + 1) & (TILE_SIZE - 1); tj = (tj + 1) & (TILE_SIZE - 1);
SYNC_WARPS; SYNC_WARPS;
......
...@@ -75,6 +75,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -75,6 +75,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
COMPUTE_VALUE COMPUTE_VALUE
} }
value += tempValue1; value += tempValue1;
ADD_TEMP_DERIVS1
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
} }
#endif #endif
...@@ -133,6 +134,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -133,6 +134,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
} }
value += tempValue1; value += tempValue1;
local_value[j] += tempValue2; local_value[j] += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
} }
#endif #endif
...@@ -144,23 +147,26 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -144,23 +147,26 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
// Write results for atom1. // Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&global_value[atom1], (long) (value*0x100000000)); unsigned int offset1 = atom1;
atom_add(&global_value[offset1], (long) (value*0x100000000));
#else #else
unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset1 = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset] += value; global_value[offset1] += value;
#endif #endif
STORE_PARAM_DERIVS1
} }
// Write results. // Write results.
for (int tgx = 0; tgx < TILE_SIZE; tgx++) { for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = y*TILE_SIZE+tgx; unsigned int offset2 = y*TILE_SIZE+tgx;
atom_add(&global_value[offset], (long) (local_value[tgx]*0x100000000)); atom_add(&global_value[offset2], (long) (local_value[tgx]*0x100000000));
#else #else
unsigned int offset = y*TILE_SIZE+tgx + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset2 = y*TILE_SIZE+tgx + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset] += local_value[tgx]; global_value[offset2] += local_value[tgx];
#endif #endif
STORE_PARAM_DERIVS2
} }
} }
} }
...@@ -260,6 +266,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -260,6 +266,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
COMPUTE_VALUE COMPUTE_VALUE
value += tempValue1; value += tempValue1;
local_value[j] += tempValue2; local_value[j] += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
} }
} }
...@@ -305,17 +313,21 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -305,17 +313,21 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
COMPUTE_VALUE COMPUTE_VALUE
value += tempValue1; value += tempValue1;
local_value[j] += tempValue2; local_value[j] += tempValue2;
ADD_TEMP_DERIVS1
ADD_TEMP_DERIVS2
} }
} }
// Write results for atom1. // Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&global_value[atom1], (long) (value*0x100000000)); unsigned int offset1 = atom1;
atom_add(&global_value[offset1], (long) (value*0x100000000));
#else #else
unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset1 = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset] += value; global_value[offset1] += value;
#endif #endif
STORE_PARAM_DERIVS1
} }
} }
...@@ -329,11 +341,13 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -329,11 +341,13 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#endif #endif
if (atom2 < PADDED_NUM_ATOMS) { if (atom2 < PADDED_NUM_ATOMS) {
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&global_value[atom2], (long) (local_value[tgx]*0x100000000)); unsigned int offset2 = atom2;
atom_add(&global_value[offset2], (long) (local_value[tgx]*0x100000000));
#else #else
unsigned int offset = atom2 + get_group_id(0)*PADDED_NUM_ATOMS; unsigned int offset2 = atom2 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset] += local_value[tgx]; global_value[offset2] += local_value[tgx];
#endif #endif
STORE_PARAM_DERIVS2
} }
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment