Commit 19d2885a authored by Lee-Ping's avatar Lee-Ping
Browse files

Merge github.com:SimTk/openmm

parents 99ef4344 57a6768e
...@@ -736,6 +736,8 @@ public: ...@@ -736,6 +736,8 @@ public:
*/ */
class OpenCLContext::ForcePreComputation { class OpenCLContext::ForcePreComputation {
public: public:
virtual ~ForcePreComputation() {
}
/** /**
* @param includeForce true if forces should be computed * @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed * @param includeEnergy true if potential energy should be computed
...@@ -752,6 +754,8 @@ public: ...@@ -752,6 +754,8 @@ public:
*/ */
class OpenCLContext::ForcePostComputation { class OpenCLContext::ForcePostComputation {
public: public:
virtual ~ForcePostComputation() {
}
/** /**
* @param includeForce true if forces should be computed * @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed * @param includeEnergy true if potential energy should be computed
......
...@@ -741,7 +741,7 @@ private: ...@@ -741,7 +741,7 @@ private:
class OpenCLCalcCustomGBForceKernel : public CalcCustomGBForceKernel { class OpenCLCalcCustomGBForceKernel : public CalcCustomGBForceKernel {
public: public:
OpenCLCalcCustomGBForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomGBForceKernel(name, platform), OpenCLCalcCustomGBForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomGBForceKernel(name, platform),
hasInitializedKernels(false), cl(cl), params(NULL), computedValues(NULL), energyDerivs(NULL), longEnergyDerivs(NULL), globals(NULL), hasInitializedKernels(false), cl(cl), params(NULL), computedValues(NULL), energyDerivs(NULL), energyDerivChain(NULL), longEnergyDerivs(NULL), globals(NULL),
valueBuffers(NULL), longValueBuffers(NULL), tabulatedFunctionParams(NULL), system(system) { valueBuffers(NULL), longValueBuffers(NULL), tabulatedFunctionParams(NULL), system(system) {
} }
~OpenCLCalcCustomGBForceKernel(); ~OpenCLCalcCustomGBForceKernel();
...@@ -775,6 +775,7 @@ private: ...@@ -775,6 +775,7 @@ private:
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLParameterSet* computedValues; OpenCLParameterSet* computedValues;
OpenCLParameterSet* energyDerivs; OpenCLParameterSet* energyDerivs;
OpenCLParameterSet* energyDerivChain;
OpenCLArray* longEnergyDerivs; OpenCLArray* longEnergyDerivs;
OpenCLArray* globals; OpenCLArray* globals;
OpenCLArray* valueBuffers; OpenCLArray* valueBuffers;
......
...@@ -19,6 +19,6 @@ ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) ...@@ -19,6 +19,6 @@ ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME}) SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME})
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${OPENCL_LIBRARIES} ${PTHREADS_LIB}) TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${OPENCL_LIBRARIES} ${PTHREADS_LIB})
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "-msse2 -DOPENMM_OPENCL_BUILDING_SHARED_LIBRARY") SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_OPENCL_BUILDING_SHARED_LIBRARY")
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET}) INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
...@@ -107,8 +107,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -107,8 +107,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
if (i != deviceIndex && deviceIndex >= 0 && deviceIndex < (int) devices.size()) if (i != deviceIndex && deviceIndex >= 0 && deviceIndex < (int) devices.size())
continue; continue;
if (platformVendor == "Apple" && devices[i].getInfo<CL_DEVICE_VENDOR>() == "AMD") if (platformVendor == "Apple" && (devices[i].getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU || devices[i].getInfo<CL_DEVICE_VENDOR>() == "AMD"))
continue; // Don't use AMD GPUs on OS X due to serious bugs. continue; // The CPU device on OS X won't work correctly, and there are serious bugs using AMD GPUs.
int maxSize = devices[i].getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>()[0]; int maxSize = devices[i].getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>()[0];
int processingElementsPerComputeUnit = 8; int processingElementsPerComputeUnit = 8;
if (devices[i].getInfo<CL_DEVICE_TYPE>() != CL_DEVICE_TYPE_GPU) { if (devices[i].getInfo<CL_DEVICE_TYPE>() != CL_DEVICE_TYPE_GPU) {
...@@ -253,8 +253,6 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -253,8 +253,6 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
paddedNumAtoms = TileSize*((numAtoms+TileSize-1)/TileSize); paddedNumAtoms = TileSize*((numAtoms+TileSize-1)/TileSize);
numAtomBlocks = (paddedNumAtoms+(TileSize-1))/TileSize; numAtomBlocks = (paddedNumAtoms+(TileSize-1))/TileSize;
numThreadBlocks = numThreadBlocksPerComputeUnit*device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(); numThreadBlocks = numThreadBlocksPerComputeUnit*device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
bonded = new OpenCLBondedUtilities(*this);
nonbonded = new OpenCLNonbondedUtilities(*this);
if (useDoublePrecision) { if (useDoublePrecision) {
posq = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms, "posq"); posq = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms, "posq");
velm = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms, "velm"); velm = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms, "velm");
...@@ -343,6 +341,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -343,6 +341,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
// Create utilities objects. // Create utilities objects.
bonded = new OpenCLBondedUtilities(*this);
nonbonded = new OpenCLNonbondedUtilities(*this);
integration = new OpenCLIntegrationUtilities(*this, system); integration = new OpenCLIntegrationUtilities(*this, system);
expression = new OpenCLExpressionUtilities(*this); expression = new OpenCLExpressionUtilities(*this);
} }
......
...@@ -2654,6 +2654,8 @@ OpenCLCalcCustomGBForceKernel::~OpenCLCalcCustomGBForceKernel() { ...@@ -2654,6 +2654,8 @@ OpenCLCalcCustomGBForceKernel::~OpenCLCalcCustomGBForceKernel() {
delete computedValues; delete computedValues;
if (energyDerivs != NULL) if (energyDerivs != NULL)
delete energyDerivs; delete energyDerivs;
if (energyDerivChain != NULL)
delete energyDerivChain;
if (longEnergyDerivs != NULL) if (longEnergyDerivs != NULL)
delete longEnergyDerivs; delete longEnergyDerivs;
if (globals != NULL) if (globals != NULL)
...@@ -2804,7 +2806,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -2804,7 +2806,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
} }
else else
energyDerivs = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms()*cl.getNonbondedUtilities().getNumForceBuffers(), "customGBEnergyDerivatives", true); energyDerivs = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms()*cl.getNonbondedUtilities().getNumForceBuffers(), "customGBEnergyDerivatives", true);
energyDerivChain = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms(), "customGBEnergyDerivativeChain", true);
// Create the kernels. // Create the kernels.
bool useCutoff = (force.getNonbondedMethod() != CustomGBForce::NoCutoff); bool useCutoff = (force.getNonbondedMethod() != CustomGBForce::NoCutoff);
...@@ -3094,6 +3097,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -3094,6 +3097,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
extraArgs << ", __global " << buffer.getType() << "* restrict derivBuffers" << index; extraArgs << ", __global " << buffer.getType() << "* restrict derivBuffers" << index;
compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n"; compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n";
} }
for (int i = 0; i < (int) energyDerivChain->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivChain->getBuffers()[i];
string index = cl.intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* restrict derivChain" << index;
}
if (useLong) { if (useLong) {
extraArgs << ", __global const long* restrict derivBuffersIn"; extraArgs << ", __global const long* restrict derivBuffersIn";
for (int i = 0; i < energyDerivs->getNumParameters(); ++i) for (int i = 0; i < energyDerivs->getNumParameters(); ++i)
...@@ -3145,6 +3153,10 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -3145,6 +3153,10 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
// Record values. // Record values.
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
string index = cl.intToString(i+1);
compute << "derivBuffers" << index << "[index] = deriv" << index << ";\n";
}
compute << "forceBuffers[index] = forceBuffers[index]+force;\n"; compute << "forceBuffers[index] = forceBuffers[index]+force;\n";
for (int i = 1; i < force.getNumComputedValues(); i++) { for (int i = 1; i < force.getNumComputedValues(); i++) {
compute << "real totalDeriv"<<i<<" = dV"<<i<<"dV0"; compute << "real totalDeriv"<<i<<" = dV"<<i<<"dV0";
...@@ -3155,7 +3167,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -3155,7 +3167,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
} }
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) { for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
string index = cl.intToString(i+1); string index = cl.intToString(i+1);
compute << "derivBuffers" << index << "[index] = deriv" << index << ";\n"; compute << "derivChain" << index << "[index] = deriv" << index << ";\n";
} }
map<string, string> replacements; map<string, string> replacements;
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str(); replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
...@@ -3292,9 +3304,9 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -3292,9 +3304,9 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if (chainStr.find(paramName+"1") != chainStr.npos || chainStr.find(paramName+"2") != chainStr.npos) if (chainStr.find(paramName+"1") != chainStr.npos || chainStr.find(paramName+"2") != chainStr.npos)
parameters.push_back(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory())); parameters.push_back(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
} }
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) { for (int i = 0; i < (int) energyDerivChain->getBuffers().size(); i++) {
if (needChainForValue[i]) { if (needChainForValue[i]) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivChain->getBuffers()[i];
string paramName = prefix+"dEdV"+cl.intToString(i+1); string paramName = prefix+"dEdV"+cl.intToString(i+1);
parameters.push_back(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory())); parameters.push_back(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
} }
...@@ -3487,6 +3499,8 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -3487,6 +3499,8 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
perParticleEnergyKernel.setArg<cl::Memory>(index++, computedValues->getBuffers()[i].getMemory()); perParticleEnergyKernel.setArg<cl::Memory>(index++, computedValues->getBuffers()[i].getMemory());
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++)
perParticleEnergyKernel.setArg<cl::Memory>(index++, energyDerivs->getBuffers()[i].getMemory()); perParticleEnergyKernel.setArg<cl::Memory>(index++, energyDerivs->getBuffers()[i].getMemory());
for (int i = 0; i < (int) energyDerivChain->getBuffers().size(); i++)
perParticleEnergyKernel.setArg<cl::Memory>(index++, energyDerivChain->getBuffers()[i].getMemory());
if (useLong) if (useLong)
perParticleEnergyKernel.setArg<cl::Memory>(index++, longEnergyDerivs->getDeviceBuffer()); perParticleEnergyKernel.setArg<cl::Memory>(index++, longEnergyDerivs->getDeviceBuffer());
if (tabulatedFunctionParams != NULL) { if (tabulatedFunctionParams != NULL) {
......
...@@ -143,15 +143,24 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p ...@@ -143,15 +143,24 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
searchPos = nextPos+1; searchPos = nextPos+1;
} }
devices.push_back(deviceIndexProperty.substr(searchPos)); devices.push_back(deviceIndexProperty.substr(searchPos));
for (int i = 0; i < (int) devices.size(); i++) { try {
if (devices[i].length() > 0) { for (int i = 0; i < (int) devices.size(); i++) {
unsigned int deviceIndex; if (devices[i].length() > 0) {
stringstream(devices[i]) >> deviceIndex; unsigned int deviceIndex;
contexts.push_back(new OpenCLContext(system, platformIndex, deviceIndex, precisionProperty, *this)); stringstream(devices[i]) >> deviceIndex;
contexts.push_back(new OpenCLContext(system, platformIndex, deviceIndex, precisionProperty, *this));
}
} }
if (contexts.size() == 0)
contexts.push_back(new OpenCLContext(system, platformIndex, -1, precisionProperty, *this));
}
catch (...) {
// If an exception was thrown, do our best to clean up memory.
for (int i = 0; i < (int) contexts.size(); i++)
delete contexts[i];
throw;
} }
if (contexts.size() == 0)
contexts.push_back(new OpenCLContext(system, platformIndex, -1, precisionProperty, *this));
stringstream deviceIndex, deviceName; stringstream deviceIndex, deviceName;
for (int i = 0; i < (int) contexts.size(); i++) { for (int i = 0; i < (int) contexts.size(); i++) {
if (i > 0) { if (i > 0) {
......
...@@ -75,7 +75,7 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) { ...@@ -75,7 +75,7 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
real sig6 = sig2*sig2*sig2; real sig6 = sig2*sig2*sig2;
real epssig6 = sig6*(sigmaEpsilon1.y*sigmaEpsilon2.y); real epssig6 = sig6*(sigmaEpsilon1.y*sigmaEpsilon2.y);
tempForce = epssig6*(12.0f*sig6 - 6.0f); tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = select((real) 0, epssig6*(sig6-1), includeInteraction); real ljEnergy = epssig6*(sig6-1);
#if USE_LJ_SWITCH #if USE_LJ_SWITCH
if (r > LJ_SWITCH_CUTOFF) { if (r > LJ_SWITCH_CUTOFF) {
real x = r-LJ_SWITCH_CUTOFF; real x = r-LJ_SWITCH_CUTOFF;
...@@ -85,6 +85,7 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) { ...@@ -85,6 +85,7 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
ljEnergy *= switchValue; ljEnergy *= switchValue;
} }
#endif #endif
ljEnergy = select((real) 0, ljEnergy, includeInteraction);
tempEnergy += ljEnergy; tempEnergy += ljEnergy;
#endif #endif
#if HAS_COULOMB #if HAS_COULOMB
......
...@@ -67,7 +67,7 @@ __kernel void computeN2Energy( ...@@ -67,7 +67,7 @@ __kernel void computeN2Energy(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
real dEdR = 0; real dEdR = 0;
...@@ -117,7 +117,7 @@ __kernel void computeN2Energy( ...@@ -117,7 +117,7 @@ __kernel void computeN2Energy(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+tj; atom2 = y*TILE_SIZE+tj;
real dEdR = 0; real dEdR = 0;
...@@ -279,7 +279,7 @@ __kernel void computeN2Energy( ...@@ -279,7 +279,7 @@ __kernel void computeN2Energy(
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
real dEdR = 0; real dEdR = 0;
...@@ -317,7 +317,7 @@ __kernel void computeN2Energy( ...@@ -317,7 +317,7 @@ __kernel void computeN2Energy(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
real dEdR = 0; real dEdR = 0;
......
...@@ -67,7 +67,7 @@ __kernel void computeN2Energy( ...@@ -67,7 +67,7 @@ __kernel void computeN2Energy(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
...@@ -133,7 +133,7 @@ __kernel void computeN2Energy( ...@@ -133,7 +133,7 @@ __kernel void computeN2Energy(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
...@@ -285,7 +285,7 @@ __kernel void computeN2Energy( ...@@ -285,7 +285,7 @@ __kernel void computeN2Energy(
real r2 = dot(delta.xyz, delta.xyz); real r2 = dot(delta.xyz, delta.xyz);
if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS && r2 < CUTOFF_SQUARED) { if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[j]; atom2 = atomIndices[j];
...@@ -341,7 +341,7 @@ __kernel void computeN2Energy( ...@@ -341,7 +341,7 @@ __kernel void computeN2Energy(
if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[j]; atom2 = atomIndices[j];
......
...@@ -59,7 +59,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -59,7 +59,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
real tempValue1 = 0; real tempValue1 = 0;
...@@ -107,7 +107,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -107,7 +107,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+tj; atom2 = y*TILE_SIZE+tj;
real tempValue1 = 0; real tempValue1 = 0;
...@@ -252,7 +252,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -252,7 +252,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
real tempValue1 = 0; real tempValue1 = 0;
...@@ -285,7 +285,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -285,7 +285,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
real tempValue1 = 0; real tempValue1 = 0;
......
...@@ -59,7 +59,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -59,7 +59,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
...@@ -116,7 +116,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -116,7 +116,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
...@@ -251,7 +251,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -251,7 +251,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real r2 = dot(delta.xyz, delta.xyz); real r2 = dot(delta.xyz, delta.xyz);
if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS && r2 < CUTOFF_SQUARED) { if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[j]; atom2 = atomIndices[j];
...@@ -296,7 +296,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -296,7 +296,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[j]; atom2 = atomIndices[j];
......
...@@ -91,7 +91,7 @@ __kernel void computeInteractionGroups( ...@@ -91,7 +91,7 @@ __kernel void computeInteractionGroups(
if (!isExcluded && r2 < CUTOFF_SQUARED) { if (!isExcluded && r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
real dEdR = 0.0f; real dEdR = 0.0f;
real tempEnergy = 0.0f; real tempEnergy = 0.0f;
......
...@@ -67,7 +67,7 @@ __kernel void computeBornSum( ...@@ -67,7 +67,7 @@ __kernel void computeBornSum(
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = (float2) (localData[tbx+j].radius, localData[tbx+j].scaledRadius); float2 params2 = (float2) (localData[tbx+j].radius, localData[tbx+j].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if ((j != tgx) && (params1.x < rScaledRadiusJ)) { if ((j != tgx) && (params1.x < rScaledRadiusJ)) {
...@@ -114,7 +114,7 @@ __kernel void computeBornSum( ...@@ -114,7 +114,7 @@ __kernel void computeBornSum(
if (atom1 < NUM_ATOMS && y*TILE_SIZE+tj < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y*TILE_SIZE+tj < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = (float2) (localData[tbx+tj].radius, localData[tbx+tj].scaledRadius); float2 params2 = (float2) (localData[tbx+tj].radius, localData[tbx+tj].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -268,7 +268,7 @@ __kernel void computeBornSum( ...@@ -268,7 +268,7 @@ __kernel void computeBornSum(
int atom2 = atomIndices[tbx+tj]; int atom2 = atomIndices[tbx+tj];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = (float2) (localData[tbx+tj].radius, localData[tbx+tj].scaledRadius); float2 params2 = (float2) (localData[tbx+tj].radius, localData[tbx+tj].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -317,7 +317,7 @@ __kernel void computeBornSum( ...@@ -317,7 +317,7 @@ __kernel void computeBornSum(
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = (float2) (localData[tbx+tj].radius, localData[tbx+tj].scaledRadius); float2 params2 = (float2) (localData[tbx+tj].radius, localData[tbx+tj].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -437,7 +437,7 @@ __kernel void computeGBSAForce1( ...@@ -437,7 +437,7 @@ __kernel void computeGBSAForce1(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[tbx+j].bornRadius; real bornRadius2 = localData[tbx+j].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
...@@ -492,7 +492,7 @@ __kernel void computeGBSAForce1( ...@@ -492,7 +492,7 @@ __kernel void computeGBSAForce1(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[tbx+tj].bornRadius; real bornRadius2 = localData[tbx+tj].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
...@@ -659,7 +659,7 @@ __kernel void computeGBSAForce1( ...@@ -659,7 +659,7 @@ __kernel void computeGBSAForce1(
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[tbx+tj].bornRadius; real bornRadius2 = localData[tbx+tj].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
...@@ -707,7 +707,7 @@ __kernel void computeGBSAForce1( ...@@ -707,7 +707,7 @@ __kernel void computeGBSAForce1(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[tbx+tj].bornRadius; real bornRadius2 = localData[tbx+tj].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
......
...@@ -71,7 +71,7 @@ __kernel void computeBornSum( ...@@ -71,7 +71,7 @@ __kernel void computeBornSum(
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius); float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if ((j != tgx) && (params1.x < rScaledRadiusJ)) { if ((j != tgx) && (params1.x < rScaledRadiusJ)) {
...@@ -120,7 +120,7 @@ __kernel void computeBornSum( ...@@ -120,7 +120,7 @@ __kernel void computeBornSum(
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius); float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -269,7 +269,7 @@ __kernel void computeBornSum( ...@@ -269,7 +269,7 @@ __kernel void computeBornSum(
int atom2 = atomIndices[j]; int atom2 = atomIndices[j];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius); float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -331,7 +331,7 @@ __kernel void computeBornSum( ...@@ -331,7 +331,7 @@ __kernel void computeBornSum(
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius); float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius);
real rScaledRadiusJ = r+params2.y; real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) { if (params1.x < rScaledRadiusJ) {
...@@ -461,7 +461,7 @@ __kernel void computeGBSAForce1( ...@@ -461,7 +461,7 @@ __kernel void computeGBSAForce1(
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[j].bornRadius; real bornRadius2 = localData[j].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
...@@ -520,7 +520,7 @@ __kernel void computeGBSAForce1( ...@@ -520,7 +520,7 @@ __kernel void computeGBSAForce1(
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) { if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[j].bornRadius; real bornRadius2 = localData[j].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
...@@ -677,7 +677,7 @@ __kernel void computeGBSAForce1( ...@@ -677,7 +677,7 @@ __kernel void computeGBSAForce1(
int atom2 = atomIndices[j]; int atom2 = atomIndices[j];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[j].bornRadius; real bornRadius2 = localData[j].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
...@@ -737,7 +737,7 @@ __kernel void computeGBSAForce1( ...@@ -737,7 +737,7 @@ __kernel void computeGBSAForce1(
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
real bornRadius2 = localData[j].bornRadius; real bornRadius2 = localData[j].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2; real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij); real D_ij = r2*RECIP(4.0f*alpha2_ij);
......
...@@ -71,7 +71,7 @@ __kernel void computeNonbonded( ...@@ -71,7 +71,7 @@ __kernel void computeNonbonded(
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
#ifdef USE_SYMMETRIC #ifdef USE_SYMMETRIC
...@@ -128,7 +128,7 @@ __kernel void computeNonbonded( ...@@ -128,7 +128,7 @@ __kernel void computeNonbonded(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+tj; atom2 = y*TILE_SIZE+tj;
#ifdef USE_SYMMETRIC #ifdef USE_SYMMETRIC
...@@ -297,7 +297,7 @@ __kernel void computeNonbonded( ...@@ -297,7 +297,7 @@ __kernel void computeNonbonded(
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
#ifdef USE_SYMMETRIC #ifdef USE_SYMMETRIC
...@@ -347,7 +347,7 @@ __kernel void computeNonbonded( ...@@ -347,7 +347,7 @@ __kernel void computeNonbonded(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
#ifdef USE_SYMMETRIC #ifdef USE_SYMMETRIC
......
...@@ -72,7 +72,7 @@ __kernel void computeNonbonded( ...@@ -72,7 +72,7 @@ __kernel void computeNonbonded(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
...@@ -140,7 +140,7 @@ __kernel void computeNonbonded( ...@@ -140,7 +140,7 @@ __kernel void computeNonbonded(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
...@@ -307,7 +307,7 @@ __kernel void computeNonbonded( ...@@ -307,7 +307,7 @@ __kernel void computeNonbonded(
real r2 = dot(delta.xyz, delta.xyz); real r2 = dot(delta.xyz, delta.xyz);
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[j]; atom2 = atomIndices[j];
...@@ -371,7 +371,7 @@ __kernel void computeNonbonded( ...@@ -371,7 +371,7 @@ __kernel void computeNonbonded(
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
#endif #endif
real invR = RSQRT(r2); real invR = RSQRT(r2);
real r = RECIP(invR); real r = r2*invR;
unsigned int atom2 = j; unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[j]; atom2 = atomIndices[j];
......
...@@ -25,6 +25,7 @@ FOREACH(TEST_PROG ${TEST_PROGS}) ...@@ -25,6 +25,7 @@ FOREACH(TEST_PROG ${TEST_PROGS})
# Link with shared library # Link with shared library
ADD_EXECUTABLE(${TEST_ROOT} ${TEST_PROG}) ADD_EXECUTABLE(${TEST_ROOT} ${TEST_PROG})
TARGET_LINK_LIBRARIES(${TEST_ROOT} ${SHARED_TARGET}) TARGET_LINK_LIBRARIES(${TEST_ROOT} ${SHARED_TARGET})
SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS}")
IF( ${TEST_ROOT} STREQUAL "TestOpenCLGBSAOBCForce2" ) IF( ${TEST_ROOT} STREQUAL "TestOpenCLGBSAOBCForce2" )
...@@ -39,13 +40,13 @@ FOREACH(TEST_PROG ${TEST_PROGS}) ...@@ -39,13 +40,13 @@ FOREACH(TEST_PROG ${TEST_PROGS})
SET(NONBOND_TEST "TestOpenCLNonbondedForce2") SET(NONBOND_TEST "TestOpenCLNonbondedForce2")
ADD_EXECUTABLE(${NONBOND_TEST} ${TEST_PROG}) ADD_EXECUTABLE(${NONBOND_TEST} ${TEST_PROG})
SET_TARGET_PROPERTIES(${NONBOND_TEST} PROPERTIES COMPILE_FLAGS "-msse2 ${NONBOND_DEFINE_STRING}" ) SET_TARGET_PROPERTIES(${NONBOND_TEST} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} ${NONBOND_DEFINE_STRING}" )
ADD_TEST(${NONBOND_TEST} ${EXECUTABLE_OUTPUT_PATH}/${NONBOND_TEST}) ADD_TEST(${NONBOND_TEST} ${EXECUTABLE_OUTPUT_PATH}/${NONBOND_TEST})
# OBC # OBC
SET(DEFINE_STRING "${DEFINE_STRING} -DIMPLICIT_SOLVENT=1") SET(DEFINE_STRING "${DEFINE_STRING} -DIMPLICIT_SOLVENT=1")
SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES COMPILE_FLAGS "-msse2 ${DEFINE_STRING}" ) SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} ${DEFINE_STRING}" )
IF( INCLUDE_SERIALIZATION ) IF( INCLUDE_SERIALIZATION )
TARGET_LINK_LIBRARIES(${NONBOND_TEST} ${SHARED_TARGET} ${SHARED_OPENMM_SERIALIZATION} ) TARGET_LINK_LIBRARIES(${NONBOND_TEST} ${SHARED_TARGET} ${SHARED_OPENMM_SERIALIZATION} )
...@@ -54,8 +55,6 @@ FOREACH(TEST_PROG ${TEST_PROGS}) ...@@ -54,8 +55,6 @@ FOREACH(TEST_PROG ${TEST_PROGS})
TARGET_LINK_LIBRARIES(${NONBOND_TEST} ${SHARED_TARGET}) TARGET_LINK_LIBRARIES(${NONBOND_TEST} ${SHARED_TARGET})
ENDIF( INCLUDE_SERIALIZATION ) ENDIF( INCLUDE_SERIALIZATION )
ELSE( ${TEST_ROOT} STREQUAL "TestOpenCLGBSAOBCForce2" )
SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES COMPILE_FLAGS -msse2)
ENDIF( ${TEST_ROOT} STREQUAL "TestOpenCLGBSAOBCForce2" ) ENDIF( ${TEST_ROOT} STREQUAL "TestOpenCLGBSAOBCForce2" )
ADD_TEST(${TEST_ROOT}Single ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT} single) ADD_TEST(${TEST_ROOT}Single ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT} single)
IF (OPENMM_BUILD_OPENCL_DOUBLE_PRECISION_TESTS) IF (OPENMM_BUILD_OPENCL_DOUBLE_PRECISION_TESTS)
......
...@@ -248,7 +248,7 @@ void testMembrane() { ...@@ -248,7 +248,7 @@ void testMembrane() {
for (int i = 0; i < (int) forces.size(); ++i) for (int i = 0; i < (int) forces.size(); ++i)
norm += forces[i].dot(forces[i]); norm += forces[i].dot(forces[i]);
norm = std::sqrt(norm); norm = std::sqrt(norm);
const double stepSize = 1e-3; const double stepSize = 1e-2;
double step = 0.5*stepSize/norm; double step = 0.5*stepSize/norm;
vector<Vec3> positions2(numParticles), positions3(numParticles); vector<Vec3> positions2(numParticles), positions3(numParticles);
for (int i = 0; i < (int) positions.size(); ++i) { for (int i = 0; i < (int) positions.size(); ++i) {
......
...@@ -71,7 +71,7 @@ void testSingleParticle() { ...@@ -71,7 +71,7 @@ void testSingleParticle() {
double bornRadius = 0.15-0.009; // dielectric offset double bornRadius = 0.15-0.009; // dielectric offset
double eps0 = EPSILON0; double eps0 = EPSILON0;
double bornEnergy = (-0.5*0.5/(8*PI_M*eps0))*(1.0/gbsa->getSoluteDielectric()-1.0/gbsa->getSolventDielectric())/bornRadius; double bornEnergy = (-0.5*0.5/(8*PI_M*eps0))*(1.0/gbsa->getSoluteDielectric()-1.0/gbsa->getSolventDielectric())/bornRadius;
double extendedRadius = bornRadius+0.14; // probe radius double extendedRadius = 0.15+0.14; // probe radius
double nonpolarEnergy = CAL2JOULE*PI_M*0.0216*(10*extendedRadius)*(10*extendedRadius)*std::pow(0.15/bornRadius, 6.0); // Where did this formula come from? Just copied it from CpuImplicitSolvent.cpp double nonpolarEnergy = CAL2JOULE*PI_M*0.0216*(10*extendedRadius)*(10*extendedRadius)*std::pow(0.15/bornRadius, 6.0); // Where did this formula come from? Just copied it from CpuImplicitSolvent.cpp
ASSERT_EQUAL_TOL((bornEnergy+nonpolarEnergy), state.getPotentialEnergy(), 0.01); ASSERT_EQUAL_TOL((bornEnergy+nonpolarEnergy), state.getPotentialEnergy(), 0.01);
......
...@@ -50,7 +50,7 @@ using namespace std; ...@@ -50,7 +50,7 @@ using namespace std;
static OpenCLPlatform platform; static OpenCLPlatform platform;
void testGaussian() { void testGaussian() {
int numAtoms = 5000; int numAtoms = 10000;
System system; System system;
for (int i = 0; i < numAtoms; i++) for (int i = 0; i < numAtoms; i++)
system.addParticle(1.0); system.addParticle(1.0);
...@@ -82,10 +82,10 @@ void testGaussian() { ...@@ -82,10 +82,10 @@ void testGaussian() {
double c2 = var-mean*mean; double c2 = var-mean*mean;
double c3 = skew-3*var*mean+2*mean*mean*mean; double c3 = skew-3*var*mean+2*mean*mean*mean;
double c4 = kurtosis-4*skew*mean-3*var*var+12*var*mean*mean-6*mean*mean*mean*mean; double c4 = kurtosis-4*skew*mean-3*var*var+12*var*mean*mean-6*mean*mean*mean*mean;
ASSERT_EQUAL_TOL(0.0, mean, 3.0/sqrt((double)numValues)); ASSERT_EQUAL_TOL(0.0, mean, 4.0/sqrt((double)numValues));
ASSERT_EQUAL_TOL(1.0, c2, 3.0/pow(numValues, 1.0/3.0)); ASSERT_EQUAL_TOL(1.0, c2, 4.0/pow(numValues, 1.0/3.0));
ASSERT_EQUAL_TOL(0.0, c3, 3.0/pow(numValues, 1.0/4.0)); ASSERT_EQUAL_TOL(0.0, c3, 4.0/pow(numValues, 1.0/4.0));
ASSERT_EQUAL_TOL(0.0, c4, 3.0/pow(numValues, 1.0/4.0)); ASSERT_EQUAL_TOL(0.0, c4, 4.0/pow(numValues, 1.0/4.0));
} }
void testRandomVelocities() { void testRandomVelocities() {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment