Commit cddeb60f authored by Peter Eastman's avatar Peter Eastman
Browse files

Added property to request deterministic forces

parent 375a7081
......@@ -1925,7 +1925,15 @@ The CUDA Platform recognizes the following Platform-specific properties:
computation, allowing the CPU to do other work. If it is set to “false”, CUDA
will spin-lock while the GPU is working. Setting it to "false" can improve performance slightly,
but also prevents the CPU from doing anything else while the GPU is working.
* CudaDeterministicForces: In some cases, the CUDA platform may compute forces
in ways that are not fully deterministic (typically differing in what order a
set of numbers get added together). This means that if you compute the forces
twice for the same particle positions, there may be tiny differences in the
results. In most cases this is not a problem, but certain algorithms depend
on forces being exactly reproducible to the last bit. If you set this
property to "true", it will instead do these calculations in a way that
produces fully deterministic results, at the cost of a small decrease in
performance.
The CUDA Platform also supports parallelizing a simulation across multiple GPUs.
To do that, set the CudaDeviceIndex property to a comma separated list of
......
......@@ -117,20 +117,27 @@ public:
static const std::string key = "CudaDisablePmeStream";
return key;
}
/**
* This is the name of the parameter for requesting that force computations be fully deterministic.
*/
static const std::string& CudaDeterministicForces() {
static const std::string key = "CudaDeterministicForces";
return key;
}
};
class OPENMM_EXPORT_CUDA CudaPlatform::PlatformData {
public:
PlatformData(ContextImpl* context, const System& system, const std::string& deviceIndexProperty, const std::string& blockingProperty, const std::string& precisionProperty,
const std::string& cpuPmeProperty, const std::string& compilerProperty, const std::string& tempProperty, const std::string& hostCompilerProperty,
const std::string& pmeStreamProperty, int numThreads);
const std::string& pmeStreamProperty, const std::string& deterministicForcesProperty, int numThreads);
~PlatformData();
void initializeContexts(const System& system);
void syncContexts();
ContextImpl* context;
std::vector<CudaContext*> contexts;
std::vector<double> contextEnergy;
bool hasInitializedContexts, removeCM, peerAccessSupported, useCpuPme, disablePmeStream;
bool hasInitializedContexts, removeCM, peerAccessSupported, useCpuPme, disablePmeStream, deterministicForces;
int cmMotionFrequency;
int stepCount, computeForceCount;
double time;
......
......@@ -1693,6 +1693,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeDefines["USE_DOUBLE_PRECISION"] = "1";
if (usePmeStream)
pmeDefines["USE_PME_STREAM"] = "1";
if (cu.getPlatformData().deterministicForces)
pmeDefines["USE_DETERMINISTIC_FORCES"] = "1";
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines);
if (cu.getPlatformData().useCpuPme) {
// Create the CPU PME kernel.
......@@ -1920,7 +1922,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()};
cu.executeKernel(pmeSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0) {
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
void* finishSpreadArgs[] = {&directPmeGrid->getDevicePointer()};
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, directPmeGrid->getSize());
}
......
......@@ -102,12 +102,14 @@ CudaPlatform::CudaPlatform() {
platformProperties.push_back(CudaTempDirectory());
platformProperties.push_back(CudaHostCompiler());
platformProperties.push_back(CudaDisablePmeStream());
platformProperties.push_back(CudaDeterministicForces());
setPropertyDefaultValue(CudaDeviceIndex(), "");
setPropertyDefaultValue(CudaDeviceName(), "");
setPropertyDefaultValue(CudaUseBlockingSync(), "true");
setPropertyDefaultValue(CudaPrecision(), "single");
setPropertyDefaultValue(CudaUseCpuPme(), "false");
setPropertyDefaultValue(CudaDisablePmeStream(), "false");
setPropertyDefaultValue(CudaDeterministicForces(), "false");
#ifdef _MSC_VER
char* bindir = getenv("CUDA_BIN_PATH");
string nvcc = (bindir == NULL ? "nvcc.exe" : string(bindir)+"\\nvcc.exe");
......@@ -168,10 +170,13 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
getPropertyDefaultValue(CudaHostCompiler()) : properties.find(CudaHostCompiler())->second);
string pmeStreamPropValue = (properties.find(CudaDisablePmeStream()) == properties.end() ?
getPropertyDefaultValue(CudaDisablePmeStream()) : properties.find(CudaDisablePmeStream())->second);
string deterministicForcesValue = (properties.find(CudaDeterministicForces()) == properties.end() ?
getPropertyDefaultValue(CudaDeterministicForces()) : properties.find(CudaDeterministicForces())->second);
transform(blockingPropValue.begin(), blockingPropValue.end(), blockingPropValue.begin(), ::tolower);
transform(precisionPropValue.begin(), precisionPropValue.end(), precisionPropValue.begin(), ::tolower);
transform(cpuPmePropValue.begin(), cpuPmePropValue.end(), cpuPmePropValue.begin(), ::tolower);
transform(pmeStreamPropValue.begin(), pmeStreamPropValue.end(), pmeStreamPropValue.begin(), ::tolower);
transform(deterministicForcesValue.begin(), deterministicForcesValue.end(), deterministicForcesValue.begin(), ::tolower);
vector<string> pmeKernelName;
pmeKernelName.push_back(CalcPmeReciprocalForceKernel::Name());
if (!supportsKernels(pmeKernelName))
......@@ -180,7 +185,8 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
char* threadsEnv = getenv("OPENMM_CPU_THREADS");
if (threadsEnv != NULL)
stringstream(threadsEnv) >> threads;
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue, hostCompilerPropValue, pmeStreamPropValue, threads));
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue,
hostCompilerPropValue, pmeStreamPropValue, deterministicForcesValue, threads));
}
void CudaPlatform::contextDestroyed(ContextImpl& context) const {
......@@ -189,7 +195,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
}
CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty, const string& hostCompilerProperty, const string& pmeStreamProperty, int numThreads) :
const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty, const string& hostCompilerProperty, const string& pmeStreamProperty,
const string& deterministicForcesProperty, int numThreads) :
context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false), threads(numThreads) {
bool blocking = (blockingProperty == "true");
vector<string> devices;
......@@ -230,6 +237,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
}
useCpuPme = (cpuPmeProperty == "true" && !contexts[0]->getUseDoublePrecision());
disablePmeStream = (pmeStreamProperty == "true");
deterministicForces = (deterministicForcesProperty == "true");
propertyValues[CudaPlatform::CudaDeviceIndex()] = deviceIndex.str();
propertyValues[CudaPlatform::CudaDeviceName()] = deviceName.str();
propertyValues[CudaPlatform::CudaUseBlockingSync()] = blocking ? "true" : "false";
......@@ -239,6 +247,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
propertyValues[CudaPlatform::CudaTempDirectory()] = tempProperty;
propertyValues[CudaPlatform::CudaHostCompiler()] = hostCompilerProperty;
propertyValues[CudaPlatform::CudaDisablePmeStream()] = disablePmeStream ? "true" : "false";
propertyValues[CudaPlatform::CudaDeterministicForces()] = deterministicForces ? "true" : "false";
contextEnergy.resize(contexts.size());
// Determine whether peer-to-peer copying is supported, and enable it if so.
......
......@@ -83,7 +83,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
#ifdef USE_DOUBLE_PRECISION
unsigned long long * ulonglong_p = (unsigned long long *) originalPmeGrid;
atomicAdd(&ulonglong_p[index], static_cast<unsigned long long>((long long) (add*0x100000000)));
#elif __CUDA_ARCH__ < 200
#elif __CUDA_ARCH__ < 200 || defined(USE_DETERMINISTIC_FORCES)
unsigned long long * ulonglong_p = (unsigned long long *) originalPmeGrid;
int gridIndex = index;
gridIndex = (gridIndex%2 == 0 ? gridIndex/2 : (gridIndex+GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z)/2);
......
......@@ -56,7 +56,7 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
system.addParticle(0.0);
CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false",
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()),
platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), 1);
platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), "false", 1);
CudaContext& context = *platformData.contexts[0];
context.initialize();
OpenMM_SFMT::SFMT sfmt;
......
......@@ -118,9 +118,44 @@ void testReordering() {
}
}
void testDeterministicForces() {
// Check that the CudaDeterministicForces property works correctly.
const int numParticles = 1000;
System system;
system.setDefaultPeriodicBoxVectors(Vec3(6, 0, 0), Vec3(2.1, 6, 0), Vec3(-1.5, -0.5, 6));
NonbondedForce *nonbonded = new NonbondedForce();
nonbonded->setNonbondedMethod(NonbondedForce::PME);
system.addForce(nonbonded);
vector<Vec3> positions;
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; i++) {
system.addParticle(1.0);
nonbonded->addParticle(i%2 == 0 ? 1 : -1, 1, 0);
positions.push_back(Vec3(genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5)*6);
}
VerletIntegrator integrator(0.001);
map<string, string> properties;
properties[CudaPlatform::CudaDeterministicForces()] = "true";
Context context(system, integrator, platform, properties);
context.setPositions(positions);
State state1 = context.getState(State::Forces);
State state2 = context.getState(State::Forces);
// All forces should be *exactly* equal.
for (int i = 0; i < numParticles; i++) {
ASSERT_EQUAL(state1.getForces()[i][0], state2.getForces()[i][0]);
ASSERT_EQUAL(state1.getForces()[i][1], state2.getForces()[i][1]);
ASSERT_EQUAL(state1.getForces()[i][2], state2.getForces()[i][2]);
}
}
void runPlatformTests() {
testParallelComputation(NonbondedForce::NoCutoff);
testParallelComputation(NonbondedForce::Ewald);
testParallelComputation(NonbondedForce::PME);
testReordering();
testDeterministicForces();
}
......@@ -56,7 +56,7 @@ void testGaussian() {
system.addParticle(1.0);
CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false",
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()),
platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), 1);
platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), "false", 1);
CudaContext& context = *platformData.contexts[0];
context.initialize();
context.getIntegrationUtilities().initRandomNumberGenerator(0);
......
......@@ -66,7 +66,7 @@ void verifySorting(vector<float> array) {
system.addParticle(0.0);
CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false",
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()),
platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), 1);
platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), "false", 1);
CudaContext& context = *platformData.contexts[0];
context.initialize();
CudaArray data(context, array.size(), 4, "sortData");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment