Commit 823b9a75 authored by peastman's avatar peastman
Browse files

Bug fixes to OpenCL on CPU

parent 68f1f485
......@@ -3239,13 +3239,22 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
load2 << "real temp_" << derivName << "_1 = 0;\n";
load2 << "real temp_" << derivName << "_2 = 0;\n";
tempDerivs1 << derivName << " += temp_" << derivName << "_1;\n";
if (deviceIsCpu)
tempDerivs2 << "local_" << derivName << "[j] += temp_" << derivName << "_2;\n";
else
tempDerivs2 << "local_" << derivName << "[tbx+tj] += temp_" << derivName << "_2;\n";
if (useLong) {
storeDeriv1 << "atom_add(&global_" << derivName << "[offset1], (long) (" << derivName << "*0x100000000));\n";
if (deviceIsCpu)
storeDeriv2 << "atom_add(&global_" << derivName << "[offset2], (long) (local_" << derivName << "[tgx]*0x100000000));\n";
else
storeDeriv2 << "atom_add(&global_" << derivName << "[offset2], (long) (local_" << derivName << "[get_local_id(0)]*0x100000000));\n";
}
else {
storeDeriv1 << "global_" << derivName << "[offset1] += " << derivName << ";\n";
if (deviceIsCpu)
storeDeriv2 << "global_" << derivName << "[offset2] += local_" << derivName << "[tgx];\n";
else
storeDeriv2 << "global_" << derivName << "[offset2] += local_" << derivName << "[get_local_id(0)];\n";
}
}
......
......@@ -87,11 +87,13 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&global_value[atom1], (long) (value*0x100000000));
unsigned int offset1 = atom1;
atom_add(&global_value[offset1], (long) (value*0x100000000));
#else
unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset] += value;
unsigned int offset1 = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset1] += value;
#endif
STORE_PARAM_DERIVS1
}
}
else {
......@@ -274,11 +276,13 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&global_value[atom1], (long) (value*0x100000000));
unsigned int offset1 = atom1;
atom_add(&global_value[offset1], (long) (value*0x100000000));
#else
unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset] += value;
unsigned int offset1 = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
global_value[offset1] += value;
#endif
STORE_PARAM_DERIVS1
}
}
else
......
......@@ -491,7 +491,7 @@ void testIllegalVariable() {
void testEnergyParameterDerivatives() {
// Create a box of particles.
const int numParticles = 30;
const int numParticles = 40;
const int numParameters = 4;
const double boxSize = 2.0;
const double delta = 1e-3;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment