/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2021 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see . *
* -------------------------------------------------------------------------- */
#include "CudaContext.h"
#include "CudaExpressionUtilities.h"
#include "CudaPlatform.h"
#include "CudaKernelFactory.h"
#include "CudaKernels.h"
#include "openmm/Context.h"
#include "openmm/System.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/hardware.h"
#include
#include
#include
#include
#ifdef _MSC_VER
#include
#endif
using namespace OpenMM;
using namespace std;
#define CHECK_RESULT(result, prefix) \
if (result != CUDA_SUCCESS) { \
std::stringstream m; \
m< 0) {
vector shortName(length);
GetShortPathName(nvcc.c_str(), &shortName[0], length);
nvcc = string(&shortName[0]);
}
setPropertyDefaultValue(CudaCompiler(), nvcc);
setPropertyDefaultValue(CudaTempDirectory(), string(getenv("TEMP")));
#else
char* compiler = getenv("OPENMM_CUDA_COMPILER");
string nvcc = (compiler == NULL ? "/usr/local/cuda/bin/nvcc" : string(compiler));
setPropertyDefaultValue(CudaCompiler(), nvcc);
char* tmpdir = getenv("TMPDIR");
string tmp = (tmpdir == NULL ? string(P_tmpdir) : string(tmpdir));
setPropertyDefaultValue(CudaTempDirectory(), tmp);
#endif
char* hostCompiler = getenv("CUDA_HOST_COMPILER");
setPropertyDefaultValue(CudaHostCompiler(), (hostCompiler == NULL ? "" : string(hostCompiler)));
}
double CudaPlatform::getSpeed() const {
return 100;
}
bool CudaPlatform::supportsDoublePrecision() const {
return true;
}
const string& CudaPlatform::getPropertyValue(const Context& context, const string& property) const {
const ContextImpl& impl = getContextImpl(context);
const PlatformData* data = reinterpret_cast(impl.getPlatformData());
string propertyName = property;
if (deprecatedPropertyReplacements.find(property) != deprecatedPropertyReplacements.end())
propertyName = deprecatedPropertyReplacements.find(property)->second;
map::const_iterator value = data->propertyValues.find(propertyName);
if (value != data->propertyValues.end())
return value->second;
return Platform::getPropertyValue(context, property);
}
void CudaPlatform::setPropertyValue(Context& context, const string& property, const string& value) const {
}
void CudaPlatform::contextCreated(ContextImpl& context, const map& properties) const {
const string& devicePropValue = (properties.find(CudaDeviceIndex()) == properties.end() ?
getPropertyDefaultValue(CudaDeviceIndex()) : properties.find(CudaDeviceIndex())->second);
string blockingPropValue = (properties.find(CudaUseBlockingSync()) == properties.end() ?
getPropertyDefaultValue(CudaUseBlockingSync()) : properties.find(CudaUseBlockingSync())->second);
string precisionPropValue = (properties.find(CudaPrecision()) == properties.end() ?
getPropertyDefaultValue(CudaPrecision()) : properties.find(CudaPrecision())->second);
string cpuPmePropValue = (properties.find(CudaUseCpuPme()) == properties.end() ?
getPropertyDefaultValue(CudaUseCpuPme()) : properties.find(CudaUseCpuPme())->second);
const string& compilerPropValue = (properties.find(CudaCompiler()) == properties.end() ?
getPropertyDefaultValue(CudaCompiler()) : properties.find(CudaCompiler())->second);
const string& tempPropValue = (properties.find(CudaTempDirectory()) == properties.end() ?
getPropertyDefaultValue(CudaTempDirectory()) : properties.find(CudaTempDirectory())->second);
const string& hostCompilerPropValue = (properties.find(CudaHostCompiler()) == properties.end() ?
getPropertyDefaultValue(CudaHostCompiler()) : properties.find(CudaHostCompiler())->second);
string pmeStreamPropValue = (properties.find(CudaDisablePmeStream()) == properties.end() ?
getPropertyDefaultValue(CudaDisablePmeStream()) : properties.find(CudaDisablePmeStream())->second);
string deterministicForcesValue = (properties.find(CudaDeterministicForces()) == properties.end() ?
getPropertyDefaultValue(CudaDeterministicForces()) : properties.find(CudaDeterministicForces())->second);
transform(blockingPropValue.begin(), blockingPropValue.end(), blockingPropValue.begin(), ::tolower);
transform(precisionPropValue.begin(), precisionPropValue.end(), precisionPropValue.begin(), ::tolower);
transform(cpuPmePropValue.begin(), cpuPmePropValue.end(), cpuPmePropValue.begin(), ::tolower);
transform(pmeStreamPropValue.begin(), pmeStreamPropValue.end(), pmeStreamPropValue.begin(), ::tolower);
transform(deterministicForcesValue.begin(), deterministicForcesValue.end(), deterministicForcesValue.begin(), ::tolower);
vector pmeKernelName;
pmeKernelName.push_back(CalcPmeReciprocalForceKernel::Name());
if (!supportsKernels(pmeKernelName))
cpuPmePropValue = "false";
int threads = getNumProcessors();
char* threadsEnv = getenv("OPENMM_CPU_THREADS");
if (threadsEnv != NULL)
stringstream(threadsEnv) >> threads;
char* compilerEnv = getenv("OPENMM_CUDA_COMPILER");
bool allowRuntimeCompiler = (compilerEnv == NULL && properties.find(CudaCompiler()) == properties.end());
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue,
hostCompilerPropValue, pmeStreamPropValue, deterministicForcesValue, threads, allowRuntimeCompiler, NULL));
}
void CudaPlatform::linkedContextCreated(ContextImpl& context, ContextImpl& originalContext) const {
Platform& platform = originalContext.getPlatform();
string devicePropValue = platform.getPropertyValue(originalContext.getOwner(), CudaDeviceIndex());
string blockingPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaUseBlockingSync());
string precisionPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaPrecision());
string cpuPmePropValue = platform.getPropertyValue(originalContext.getOwner(), CudaUseCpuPme());
string compilerPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaCompiler());
string tempPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaTempDirectory());
string hostCompilerPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaHostCompiler());
string pmeStreamPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaDisablePmeStream());
string deterministicForcesValue = platform.getPropertyValue(originalContext.getOwner(), CudaDeterministicForces());
int threads = reinterpret_cast(originalContext.getPlatformData())->threads.getNumThreads();
bool allowRuntimeCompiler = reinterpret_cast(originalContext.getPlatformData())->allowRuntimeCompiler;
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue,
hostCompilerPropValue, pmeStreamPropValue, deterministicForcesValue, threads, allowRuntimeCompiler, &originalContext));
}
void CudaPlatform::contextDestroyed(ContextImpl& context) const {
PlatformData* data = reinterpret_cast(context.getPlatformData());
delete data;
}
CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty, const string& hostCompilerProperty, const string& pmeStreamProperty,
const string& deterministicForcesProperty, int numThreads, bool allowRuntimeCompiler, ContextImpl* originalContext) :
context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false),
threads(numThreads), allowRuntimeCompiler(allowRuntimeCompiler) {
bool blocking = (blockingProperty == "true");
vector devices;
size_t searchPos = 0, nextPos;
while ((nextPos = deviceIndexProperty.find_first_of(", ", searchPos)) != string::npos) {
devices.push_back(deviceIndexProperty.substr(searchPos, nextPos-searchPos));
searchPos = nextPos+1;
}
devices.push_back(deviceIndexProperty.substr(searchPos));
PlatformData* originalData = NULL;
if (originalContext != NULL)
originalData = reinterpret_cast(originalContext->getPlatformData());
try {
for (int i = 0; i < (int) devices.size(); i++) {
if (devices[i].length() > 0) {
int deviceIndex;
stringstream(devices[i]) >> deviceIndex;
contexts.push_back(new CudaContext(system, deviceIndex, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, allowRuntimeCompiler, *this, (originalData == NULL ? NULL : originalData->contexts[i])));
}
}
if (contexts.size() == 0)
contexts.push_back(new CudaContext(system, -1, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, allowRuntimeCompiler, *this, (originalData == NULL ? NULL : originalData->contexts[0])));
}
catch (...) {
// If an exception was thrown, do our best to clean up memory.
for (int i = 0; i < (int) contexts.size(); i++)
delete contexts[i];
throw;
}
stringstream deviceIndex, deviceName;
for (int i = 0; i < (int) contexts.size(); i++) {
if (i > 0) {
deviceIndex << ',';
deviceName << ',';
}
deviceIndex << contexts[i]->getDeviceIndex();
char name[1000];
CHECK_RESULT(cuDeviceGetName(name, 1000, contexts[i]->getDevice()), "Error querying device name");
deviceName << name;
}
useCpuPme = (cpuPmeProperty == "true" && !contexts[0]->getUseDoublePrecision());
disablePmeStream = (pmeStreamProperty == "true");
deterministicForces = (deterministicForcesProperty == "true");
propertyValues[CudaPlatform::CudaDeviceIndex()] = deviceIndex.str();
propertyValues[CudaPlatform::CudaDeviceName()] = deviceName.str();
propertyValues[CudaPlatform::CudaUseBlockingSync()] = blocking ? "true" : "false";
propertyValues[CudaPlatform::CudaPrecision()] = precisionProperty;
propertyValues[CudaPlatform::CudaUseCpuPme()] = useCpuPme ? "true" : "false";
propertyValues[CudaPlatform::CudaCompiler()] = compilerProperty;
propertyValues[CudaPlatform::CudaTempDirectory()] = tempProperty;
propertyValues[CudaPlatform::CudaHostCompiler()] = hostCompilerProperty;
propertyValues[CudaPlatform::CudaDisablePmeStream()] = disablePmeStream ? "true" : "false";
propertyValues[CudaPlatform::CudaDeterministicForces()] = deterministicForces ? "true" : "false";
contextEnergy.resize(contexts.size());
// Determine whether peer-to-peer copying is supported, and enable it if so.
peerAccessSupported = true;
for (int i = 1; i < contexts.size(); i++) {
int canAccess;
cuDeviceCanAccessPeer(&canAccess, contexts[i]->getDevice(), contexts[0]->getDevice());
if (!canAccess) {
peerAccessSupported = false;
break;
}
}
}
CudaPlatform::PlatformData::~PlatformData() {
for (int i = 0; i < (int) contexts.size(); i++)
delete contexts[i];
}
void CudaPlatform::PlatformData::initializeContexts(const System& system) {
if (hasInitializedContexts)
return;
for (int i = 0; i < (int) contexts.size(); i++)
contexts[i]->initialize();
hasInitializedContexts = true;
}
void CudaPlatform::PlatformData::syncContexts() {
for (int i = 0; i < (int) contexts.size(); i++)
contexts[i]->getWorkThread().flush();
}