Commit 19d2885a authored by Lee-Ping's avatar Lee-Ping
Browse files

Merge github.com:SimTk/openmm

parents 99ef4344 57a6768e
......@@ -736,6 +736,8 @@ public:
*/
class OpenCLContext::ForcePreComputation {
public:
virtual ~ForcePreComputation() {
}
/**
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
......@@ -752,6 +754,8 @@ public:
*/
class OpenCLContext::ForcePostComputation {
public:
virtual ~ForcePostComputation() {
}
/**
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
......
......@@ -741,7 +741,7 @@ private:
class OpenCLCalcCustomGBForceKernel : public CalcCustomGBForceKernel {
public:
OpenCLCalcCustomGBForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomGBForceKernel(name, platform),
hasInitializedKernels(false), cl(cl), params(NULL), computedValues(NULL), energyDerivs(NULL), longEnergyDerivs(NULL), globals(NULL),
hasInitializedKernels(false), cl(cl), params(NULL), computedValues(NULL), energyDerivs(NULL), energyDerivChain(NULL), longEnergyDerivs(NULL), globals(NULL),
valueBuffers(NULL), longValueBuffers(NULL), tabulatedFunctionParams(NULL), system(system) {
}
~OpenCLCalcCustomGBForceKernel();
......@@ -775,6 +775,7 @@ private:
OpenCLParameterSet* params;
OpenCLParameterSet* computedValues;
OpenCLParameterSet* energyDerivs;
OpenCLParameterSet* energyDerivChain;
OpenCLArray* longEnergyDerivs;
OpenCLArray* globals;
OpenCLArray* valueBuffers;
......
......@@ -19,6 +19,6 @@ ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME})
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${OPENCL_LIBRARIES} ${PTHREADS_LIB})
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "-msse2 -DOPENMM_OPENCL_BUILDING_SHARED_LIBRARY")
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_OPENCL_BUILDING_SHARED_LIBRARY")
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
......@@ -107,8 +107,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
if (i != deviceIndex && deviceIndex >= 0 && deviceIndex < (int) devices.size())
continue;
if (platformVendor == "Apple" && devices[i].getInfo<CL_DEVICE_VENDOR>() == "AMD")
continue; // Don't use AMD GPUs on OS X due to serious bugs.
if (platformVendor == "Apple" && (devices[i].getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU || devices[i].getInfo<CL_DEVICE_VENDOR>() == "AMD"))
continue; // The CPU device on OS X won't work correctly, and there are serious bugs using AMD GPUs.
int maxSize = devices[i].getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>()[0];
int processingElementsPerComputeUnit = 8;
if (devices[i].getInfo<CL_DEVICE_TYPE>() != CL_DEVICE_TYPE_GPU) {
......@@ -253,8 +253,6 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
paddedNumAtoms = TileSize*((numAtoms+TileSize-1)/TileSize);
numAtomBlocks = (paddedNumAtoms+(TileSize-1))/TileSize;
numThreadBlocks = numThreadBlocksPerComputeUnit*device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
bonded = new OpenCLBondedUtilities(*this);
nonbonded = new OpenCLNonbondedUtilities(*this);
if (useDoublePrecision) {
posq = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms, "posq");
velm = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms, "velm");
......@@ -343,6 +341,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
// Create utilities objects.
bonded = new OpenCLBondedUtilities(*this);
nonbonded = new OpenCLNonbondedUtilities(*this);
integration = new OpenCLIntegrationUtilities(*this, system);
expression = new OpenCLExpressionUtilities(*this);
}
......
......@@ -2654,6 +2654,8 @@ OpenCLCalcCustomGBForceKernel::~OpenCLCalcCustomGBForceKernel() {
delete computedValues;
if (energyDerivs != NULL)
delete energyDerivs;
if (energyDerivChain != NULL)
delete energyDerivChain;
if (longEnergyDerivs != NULL)
delete longEnergyDerivs;
if (globals != NULL)
......@@ -2804,7 +2806,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
}
else
energyDerivs = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms()*cl.getNonbondedUtilities().getNumForceBuffers(), "customGBEnergyDerivatives", true);
energyDerivChain = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms(), "customGBEnergyDerivativeChain", true);
// Create the kernels.
bool useCutoff = (force.getNonbondedMethod() != CustomGBForce::NoCutoff);
......@@ -3094,6 +3097,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
extraArgs << ", __global " << buffer.getType() << "* restrict derivBuffers" << index;
compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n";
}
for (int i = 0; i < (int) energyDerivChain->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivChain->getBuffers()[i];
string index = cl.intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* restrict derivChain" << index;
}
if (useLong) {
extraArgs << ", __global const long* restrict derivBuffersIn";
for (int i = 0; i < energyDerivs->getNumParameters(); ++i)
......@@ -3145,6 +3153,10 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
// Record values.
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
string index = cl.intToString(i+1);
compute << "derivBuffers" << index << "[index] = deriv" << index << ";\n";
}
compute << "forceBuffers[index] = forceBuffers[index]+force;\n";
for (int i = 1; i < force.getNumComputedValues(); i++) {
compute << "real totalDeriv"<<i<<" = dV"<<i<<"dV0";
......@@ -3155,7 +3167,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
}
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
string index = cl.intToString(i+1);
compute << "derivBuffers" << index << "[index] = deriv" << index << ";\n";
compute << "derivChain" << index << "[index] = deriv" << index << ";\n";
}
map<string, string> replacements;
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
......@@ -3292,9 +3304,9 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if (chainStr.find(paramName+"1") != chainStr.npos || chainStr.find(paramName+"2") != chainStr.npos)
parameters.push_back(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
for (int i = 0; i < (int) energyDerivChain->getBuffers().size(); i++) {
if (needChainForValue[i]) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivChain->getBuffers()[i];
string paramName = prefix+"dEdV"+cl.intToString(i+1);
parameters.push_back(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
......@@ -3487,6 +3499,8 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
perParticleEnergyKernel.setArg<cl::Memory>(index++, computedValues->getBuffers()[i].getMemory());
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++)
perParticleEnergyKernel.setArg<cl::Memory>(index++, energyDerivs->getBuffers()[i].getMemory());
for (int i = 0; i < (int) energyDerivChain->getBuffers().size(); i++)
perParticleEnergyKernel.setArg<cl::Memory>(index++, energyDerivChain->getBuffers()[i].getMemory());
if (useLong)
perParticleEnergyKernel.setArg<cl::Memory>(index++, longEnergyDerivs->getDeviceBuffer());
if (tabulatedFunctionParams != NULL) {
......
......@@ -143,15 +143,24 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
searchPos = nextPos+1;
}
devices.push_back(deviceIndexProperty.substr(searchPos));
for (int i = 0; i < (int) devices.size(); i++) {
if (devices[i].length() > 0) {
unsigned int deviceIndex;
stringstream(devices[i]) >> deviceIndex;
contexts.push_back(new OpenCLContext(system, platformIndex, deviceIndex, precisionProperty, *this));
try {
for (int i = 0; i < (int) devices.size(); i++) {
if (devices[i].length() > 0) {
unsigned int deviceIndex;
stringstream(devices[i]) >> deviceIndex;
contexts.push_back(new OpenCLContext(system, platformIndex, deviceIndex, precisionProperty, *this));
}
}
if (contexts.size() == 0)
contexts.push_back(new OpenCLContext(system, platformIndex, -1, precisionProperty, *this));
}
catch (...) {
// If an exception was thrown, do our best to clean up memory.
for (int i = 0; i < (int) contexts.size(); i++)
delete contexts[i];
throw;
}
if (contexts.size() == 0)
contexts.push_back(new OpenCLContext(system, platformIndex, -1, precisionProperty, *this));
stringstream deviceIndex, deviceName;
for (int i = 0; i < (int) contexts.size(); i++) {
if (i > 0) {
......
......@@ -75,7 +75,7 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
real sig6 = sig2*sig2*sig2;
real epssig6 = sig6*(sigmaEpsilon1.y*sigmaEpsilon2.y);
tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = select((real) 0, epssig6*(sig6-1), includeInteraction);
real ljEnergy = epssig6*(sig6-1);
#if USE_LJ_SWITCH
if (r > LJ_SWITCH_CUTOFF) {
real x = r-LJ_SWITCH_CUTOFF;
......@@ -85,6 +85,7 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
ljEnergy *= switchValue;
}
#endif
ljEnergy = select((real) 0, ljEnergy, includeInteraction);
tempEnergy += ljEnergy;
#endif
#if HAS_COULOMB
......
......@@ -67,7 +67,7 @@ __kernel void computeN2Energy(
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j;
real dEdR = 0;
......@@ -117,7 +117,7 @@ __kernel void computeN2Energy(
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+tj;
real dEdR = 0;
......@@ -279,7 +279,7 @@ __kernel void computeN2Energy(
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj];
real dEdR = 0;
......@@ -317,7 +317,7 @@ __kernel void computeN2Energy(
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj];
real dEdR = 0;
......
......@@ -67,7 +67,7 @@ __kernel void computeN2Energy(
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j;
......@@ -133,7 +133,7 @@ __kernel void computeN2Energy(
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j;
......@@ -285,7 +285,7 @@ __kernel void computeN2Energy(
real r2 = dot(delta.xyz, delta.xyz);
if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[j];
......@@ -341,7 +341,7 @@ __kernel void computeN2Energy(
if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[j];
......
......@@ -59,7 +59,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j;
real tempValue1 = 0;
......@@ -107,7 +107,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+tj;
real tempValue1 = 0;
......@@ -252,7 +252,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj];
real tempValue1 = 0;
......@@ -285,7 +285,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj];
real tempValue1 = 0;
......
......@@ -59,7 +59,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j;
......@@ -116,7 +116,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j;
......@@ -251,7 +251,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real r2 = dot(delta.xyz, delta.xyz);
if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[j];
......@@ -296,7 +296,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[j];
......
......@@ -91,7 +91,7 @@ __kernel void computeInteractionGroups(
if (!isExcluded && r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
real dEdR = 0.0f;
real tempEnergy = 0.0f;
......
......@@ -67,7 +67,7 @@ __kernel void computeBornSum(
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
float2 params2 = (float2) (localData[tbx+j].radius, localData[tbx+j].scaledRadius);
real rScaledRadiusJ = r+params2.y;
if ((j != tgx) && (params1.x < rScaledRadiusJ)) {
......@@ -114,7 +114,7 @@ __kernel void computeBornSum(
if (atom1 < NUM_ATOMS && y*TILE_SIZE+tj < NUM_ATOMS) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
float2 params2 = (float2) (localData[tbx+tj].radius, localData[tbx+tj].scaledRadius);
real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) {
......@@ -268,7 +268,7 @@ __kernel void computeBornSum(
int atom2 = atomIndices[tbx+tj];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
float2 params2 = (float2) (localData[tbx+tj].radius, localData[tbx+tj].scaledRadius);
real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) {
......@@ -317,7 +317,7 @@ __kernel void computeBornSum(
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
float2 params2 = (float2) (localData[tbx+tj].radius, localData[tbx+tj].scaledRadius);
real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) {
......@@ -437,7 +437,7 @@ __kernel void computeGBSAForce1(
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
real bornRadius2 = localData[tbx+j].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij);
......@@ -492,7 +492,7 @@ __kernel void computeGBSAForce1(
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
real bornRadius2 = localData[tbx+tj].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij);
......@@ -659,7 +659,7 @@ __kernel void computeGBSAForce1(
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
real bornRadius2 = localData[tbx+tj].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij);
......@@ -707,7 +707,7 @@ __kernel void computeGBSAForce1(
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
real bornRadius2 = localData[tbx+tj].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij);
......
......@@ -71,7 +71,7 @@ __kernel void computeBornSum(
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius);
real rScaledRadiusJ = r+params2.y;
if ((j != tgx) && (params1.x < rScaledRadiusJ)) {
......@@ -120,7 +120,7 @@ __kernel void computeBornSum(
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius);
real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) {
......@@ -269,7 +269,7 @@ __kernel void computeBornSum(
int atom2 = atomIndices[j];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius);
real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) {
......@@ -331,7 +331,7 @@ __kernel void computeBornSum(
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
float2 params2 = (float2) (localData[j].radius, localData[j].scaledRadius);
real rScaledRadiusJ = r+params2.y;
if (params1.x < rScaledRadiusJ) {
......@@ -461,7 +461,7 @@ __kernel void computeGBSAForce1(
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
real bornRadius2 = localData[j].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij);
......@@ -520,7 +520,7 @@ __kernel void computeGBSAForce1(
if (atom1 < NUM_ATOMS && y*TILE_SIZE+j < NUM_ATOMS) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
real bornRadius2 = localData[j].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij);
......@@ -677,7 +677,7 @@ __kernel void computeGBSAForce1(
int atom2 = atomIndices[j];
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
real bornRadius2 = localData[j].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij);
......@@ -737,7 +737,7 @@ __kernel void computeGBSAForce1(
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
real bornRadius2 = localData[j].bornRadius;
real alpha2_ij = bornRadius1*bornRadius2;
real D_ij = r2*RECIP(4.0f*alpha2_ij);
......
......@@ -71,7 +71,7 @@ __kernel void computeNonbonded(
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j;
#ifdef USE_SYMMETRIC
......@@ -128,7 +128,7 @@ __kernel void computeNonbonded(
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+tj;
#ifdef USE_SYMMETRIC
......@@ -297,7 +297,7 @@ __kernel void computeNonbonded(
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj];
#ifdef USE_SYMMETRIC
......@@ -347,7 +347,7 @@ __kernel void computeNonbonded(
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[tbx+tj];
#ifdef USE_SYMMETRIC
......
......@@ -72,7 +72,7 @@ __kernel void computeNonbonded(
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j;
......@@ -140,7 +140,7 @@ __kernel void computeNonbonded(
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j;
......@@ -307,7 +307,7 @@ __kernel void computeNonbonded(
real r2 = dot(delta.xyz, delta.xyz);
if (r2 < CUTOFF_SQUARED) {
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[j];
......@@ -371,7 +371,7 @@ __kernel void computeNonbonded(
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
real r = r2*invR;
unsigned int atom2 = j;
LOAD_ATOM2_PARAMETERS
atom2 = atomIndices[j];
......
......@@ -25,6 +25,7 @@ FOREACH(TEST_PROG ${TEST_PROGS})
# Link with shared library
ADD_EXECUTABLE(${TEST_ROOT} ${TEST_PROG})
TARGET_LINK_LIBRARIES(${TEST_ROOT} ${SHARED_TARGET})
SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS}")
IF( ${TEST_ROOT} STREQUAL "TestOpenCLGBSAOBCForce2" )
......@@ -39,13 +40,13 @@ FOREACH(TEST_PROG ${TEST_PROGS})
SET(NONBOND_TEST "TestOpenCLNonbondedForce2")
ADD_EXECUTABLE(${NONBOND_TEST} ${TEST_PROG})
SET_TARGET_PROPERTIES(${NONBOND_TEST} PROPERTIES COMPILE_FLAGS "-msse2 ${NONBOND_DEFINE_STRING}" )
SET_TARGET_PROPERTIES(${NONBOND_TEST} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} ${NONBOND_DEFINE_STRING}" )
ADD_TEST(${NONBOND_TEST} ${EXECUTABLE_OUTPUT_PATH}/${NONBOND_TEST})
# OBC
SET(DEFINE_STRING "${DEFINE_STRING} -DIMPLICIT_SOLVENT=1")
SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES COMPILE_FLAGS "-msse2 ${DEFINE_STRING}" )
SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} ${DEFINE_STRING}" )
IF( INCLUDE_SERIALIZATION )
TARGET_LINK_LIBRARIES(${NONBOND_TEST} ${SHARED_TARGET} ${SHARED_OPENMM_SERIALIZATION} )
......@@ -54,8 +55,6 @@ FOREACH(TEST_PROG ${TEST_PROGS})
TARGET_LINK_LIBRARIES(${NONBOND_TEST} ${SHARED_TARGET})
ENDIF( INCLUDE_SERIALIZATION )
ELSE( ${TEST_ROOT} STREQUAL "TestOpenCLGBSAOBCForce2" )
SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES COMPILE_FLAGS -msse2)
ENDIF( ${TEST_ROOT} STREQUAL "TestOpenCLGBSAOBCForce2" )
ADD_TEST(${TEST_ROOT}Single ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT} single)
IF (OPENMM_BUILD_OPENCL_DOUBLE_PRECISION_TESTS)
......
......@@ -248,7 +248,7 @@ void testMembrane() {
for (int i = 0; i < (int) forces.size(); ++i)
norm += forces[i].dot(forces[i]);
norm = std::sqrt(norm);
const double stepSize = 1e-3;
const double stepSize = 1e-2;
double step = 0.5*stepSize/norm;
vector<Vec3> positions2(numParticles), positions3(numParticles);
for (int i = 0; i < (int) positions.size(); ++i) {
......
......@@ -71,7 +71,7 @@ void testSingleParticle() {
double bornRadius = 0.15-0.009; // dielectric offset
double eps0 = EPSILON0;
double bornEnergy = (-0.5*0.5/(8*PI_M*eps0))*(1.0/gbsa->getSoluteDielectric()-1.0/gbsa->getSolventDielectric())/bornRadius;
double extendedRadius = bornRadius+0.14; // probe radius
double extendedRadius = 0.15+0.14; // probe radius
double nonpolarEnergy = CAL2JOULE*PI_M*0.0216*(10*extendedRadius)*(10*extendedRadius)*std::pow(0.15/bornRadius, 6.0); // Where did this formula come from? Just copied it from CpuImplicitSolvent.cpp
ASSERT_EQUAL_TOL((bornEnergy+nonpolarEnergy), state.getPotentialEnergy(), 0.01);
......
......@@ -50,7 +50,7 @@ using namespace std;
static OpenCLPlatform platform;
void testGaussian() {
int numAtoms = 5000;
int numAtoms = 10000;
System system;
for (int i = 0; i < numAtoms; i++)
system.addParticle(1.0);
......@@ -82,10 +82,10 @@ void testGaussian() {
double c2 = var-mean*mean;
double c3 = skew-3*var*mean+2*mean*mean*mean;
double c4 = kurtosis-4*skew*mean-3*var*var+12*var*mean*mean-6*mean*mean*mean*mean;
ASSERT_EQUAL_TOL(0.0, mean, 3.0/sqrt((double)numValues));
ASSERT_EQUAL_TOL(1.0, c2, 3.0/pow(numValues, 1.0/3.0));
ASSERT_EQUAL_TOL(0.0, c3, 3.0/pow(numValues, 1.0/4.0));
ASSERT_EQUAL_TOL(0.0, c4, 3.0/pow(numValues, 1.0/4.0));
ASSERT_EQUAL_TOL(0.0, mean, 4.0/sqrt((double)numValues));
ASSERT_EQUAL_TOL(1.0, c2, 4.0/pow(numValues, 1.0/3.0));
ASSERT_EQUAL_TOL(0.0, c3, 4.0/pow(numValues, 1.0/4.0));
ASSERT_EQUAL_TOL(0.0, c4, 4.0/pow(numValues, 1.0/4.0));
}
void testRandomVelocities() {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment