CudaPlatform.cpp 18.4 KB
Newer Older
1
2
3
4
5
6
7
8
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
9
 * Portions copyright (c) 2008-2021 Stanford University and the Authors.      *
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

#include "CudaContext.h"
#include "CudaExpressionUtilities.h"
#include "CudaPlatform.h"
#include "CudaKernelFactory.h"
#include "CudaKernels.h"
#include "openmm/Context.h"
#include "openmm/System.h"
34
35
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/hardware.h"
36
37
38
#include <algorithm>
#include <cctype>
#include <sstream>
39
#include <cstdio>
40
41
42
#ifdef _MSC_VER
    #include <Windows.h>
#endif
43
44
45
using namespace OpenMM;
using namespace std;

46
47
48
49
50
51
52
#define CHECK_RESULT(result, prefix) \
    if (result != CUDA_SUCCESS) { \
        std::stringstream m; \
        m<<prefix<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")"<<" at "<<__FILE__<<":"<<__LINE__; \
        throw OpenMMException(m.str());\
    }

53

54
#ifdef OPENMM_COMMON_BUILDING_STATIC_LIBRARY
55
56
57
58
extern "C" void registerCudaPlatform() {
    Platform::registerPlatform(new CudaPlatform());
}
#else
59
extern "C" OPENMM_EXPORT_COMMON void registerPlatforms() {
60
61
    Platform::registerPlatform(new CudaPlatform());
}
62
#endif
63
64

CudaPlatform::CudaPlatform() {
Peter Eastman's avatar
Peter Eastman committed
65
66
67
68
69
70
71
72
    deprecatedPropertyReplacements["CudaDeviceIndex"] = CudaDeviceIndex();
    deprecatedPropertyReplacements["CudaDeviceName"] = CudaDeviceName();
    deprecatedPropertyReplacements["CudaUseBlockingSync"] = CudaUseBlockingSync();
    deprecatedPropertyReplacements["CudaPrecision"] = CudaPrecision();
    deprecatedPropertyReplacements["CudaUseCpuPme"] = CudaUseCpuPme();
    deprecatedPropertyReplacements["CudaTempDirectory"] = CudaTempDirectory();
    deprecatedPropertyReplacements["CudaDisablePmeStream"] = CudaDisablePmeStream();
    deprecatedPropertyReplacements["CudaDeterministicForces"] = CudaDeterministicForces();
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
    CudaKernelFactory* factory = new CudaKernelFactory();
    registerKernelFactory(CalcForcesAndEnergyKernel::Name(), factory);
    registerKernelFactory(UpdateStateDataKernel::Name(), factory);
    registerKernelFactory(ApplyConstraintsKernel::Name(), factory);
    registerKernelFactory(VirtualSitesKernel::Name(), factory);
    registerKernelFactory(CalcHarmonicBondForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomBondForceKernel::Name(), factory);
    registerKernelFactory(CalcHarmonicAngleForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomAngleForceKernel::Name(), factory);
    registerKernelFactory(CalcPeriodicTorsionForceKernel::Name(), factory);
    registerKernelFactory(CalcRBTorsionForceKernel::Name(), factory);
    registerKernelFactory(CalcCMAPTorsionForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomTorsionForceKernel::Name(), factory);
    registerKernelFactory(CalcNonbondedForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomNonbondedForceKernel::Name(), factory);
    registerKernelFactory(CalcGBSAOBCForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomGBForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomExternalForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomHbondForceKernel::Name(), factory);
92
    registerKernelFactory(CalcCustomCentroidBondForceKernel::Name(), factory);
93
    registerKernelFactory(CalcCustomCompoundBondForceKernel::Name(), factory);
94
    registerKernelFactory(CalcCustomCVForceKernel::Name(), factory);
95
    registerKernelFactory(CalcRMSDForceKernel::Name(), factory);
96
    registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory);
peastman's avatar
peastman committed
97
    registerKernelFactory(CalcGayBerneForceKernel::Name(), factory);
98
    registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
99
    registerKernelFactory(IntegrateNoseHooverStepKernel::Name(), factory);
100
    registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
101
    registerKernelFactory(IntegrateLangevinMiddleStepKernel::Name(), factory);
102
103
104
105
106
107
108
109
    registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);
    registerKernelFactory(IntegrateVariableVerletStepKernel::Name(), factory);
    registerKernelFactory(IntegrateVariableLangevinStepKernel::Name(), factory);
    registerKernelFactory(IntegrateCustomStepKernel::Name(), factory);
    registerKernelFactory(ApplyAndersenThermostatKernel::Name(), factory);
    registerKernelFactory(ApplyMonteCarloBarostatKernel::Name(), factory);
    registerKernelFactory(RemoveCMMotionKernel::Name(), factory);
    platformProperties.push_back(CudaDeviceIndex());
110
    platformProperties.push_back(CudaDeviceName());
111
112
    platformProperties.push_back(CudaUseBlockingSync());
    platformProperties.push_back(CudaPrecision());
113
    platformProperties.push_back(CudaUseCpuPme());
114
115
    platformProperties.push_back(CudaCompiler());
    platformProperties.push_back(CudaTempDirectory());
116
    platformProperties.push_back(CudaHostCompiler());
117
    platformProperties.push_back(CudaDisablePmeStream());
118
    platformProperties.push_back(CudaDeterministicForces());
119
    setPropertyDefaultValue(CudaDeviceIndex(), "");
120
    setPropertyDefaultValue(CudaDeviceName(), "");
121
    setPropertyDefaultValue(CudaUseBlockingSync(), "false");
122
    setPropertyDefaultValue(CudaPrecision(), "single");
123
    setPropertyDefaultValue(CudaUseCpuPme(), "false");
124
    setPropertyDefaultValue(CudaDisablePmeStream(), "false");
125
    setPropertyDefaultValue(CudaDeterministicForces(), "false");
126
#ifdef _MSC_VER
127
    char* bindir = getenv("CUDA_BIN_PATH");
128
    string nvcc = (bindir == NULL ? "nvcc.exe" : string(bindir)+"\\nvcc.exe");
129
    int length = GetShortPathName(nvcc.c_str(), NULL, 0);
130
131
132
133
134
    if (length > 0) {
        vector<char> shortName(length);
        GetShortPathName(nvcc.c_str(), &shortName[0], length);
        nvcc = string(&shortName[0]);
    }
135
    setPropertyDefaultValue(CudaCompiler(), nvcc);
136
137
    setPropertyDefaultValue(CudaTempDirectory(), string(getenv("TEMP")));
#else
138
    char* compiler = getenv("OPENMM_CUDA_COMPILER");
139
    string nvcc = (compiler == NULL ? "/usr/local/cuda/bin/nvcc" : string(compiler));
140
    setPropertyDefaultValue(CudaCompiler(), nvcc);
141
    char* tmpdir = getenv("TMPDIR");
Peter Eastman's avatar
Peter Eastman committed
142
143
    string tmp = (tmpdir == NULL ? string(P_tmpdir) : string(tmpdir));
    setPropertyDefaultValue(CudaTempDirectory(), tmp);
144
#endif
145
146
    char* hostCompiler = getenv("CUDA_HOST_COMPILER");
    setPropertyDefaultValue(CudaHostCompiler(), (hostCompiler == NULL ? "" : string(hostCompiler)));
147
148
}

149
150
151
152
double CudaPlatform::getSpeed() const {
    return 100;
}

153
bool CudaPlatform::supportsDoublePrecision() const {
Peter Eastman's avatar
Peter Eastman committed
154
    return true;
155
156
157
158
159
}

const string& CudaPlatform::getPropertyValue(const Context& context, const string& property) const {
    const ContextImpl& impl = getContextImpl(context);
    const PlatformData* data = reinterpret_cast<const PlatformData*>(impl.getPlatformData());
160
161
162
163
    string propertyName = property;
    if (deprecatedPropertyReplacements.find(property) != deprecatedPropertyReplacements.end())
        propertyName = deprecatedPropertyReplacements.find(property)->second;
    map<string, string>::const_iterator value = data->propertyValues.find(propertyName);
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
    if (value != data->propertyValues.end())
        return value->second;
    return Platform::getPropertyValue(context, property);
}

void CudaPlatform::setPropertyValue(Context& context, const string& property, const string& value) const {
}

void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string>& properties) const {
    const string& devicePropValue = (properties.find(CudaDeviceIndex()) == properties.end() ?
            getPropertyDefaultValue(CudaDeviceIndex()) : properties.find(CudaDeviceIndex())->second);
    string blockingPropValue = (properties.find(CudaUseBlockingSync()) == properties.end() ?
            getPropertyDefaultValue(CudaUseBlockingSync()) : properties.find(CudaUseBlockingSync())->second);
    string precisionPropValue = (properties.find(CudaPrecision()) == properties.end() ?
            getPropertyDefaultValue(CudaPrecision()) : properties.find(CudaPrecision())->second);
179
180
    string cpuPmePropValue = (properties.find(CudaUseCpuPme()) == properties.end() ?
            getPropertyDefaultValue(CudaUseCpuPme()) : properties.find(CudaUseCpuPme())->second);
181
182
183
184
    const string& compilerPropValue = (properties.find(CudaCompiler()) == properties.end() ?
            getPropertyDefaultValue(CudaCompiler()) : properties.find(CudaCompiler())->second);
    const string& tempPropValue = (properties.find(CudaTempDirectory()) == properties.end() ?
            getPropertyDefaultValue(CudaTempDirectory()) : properties.find(CudaTempDirectory())->second);
185
186
    const string& hostCompilerPropValue = (properties.find(CudaHostCompiler()) == properties.end() ?
            getPropertyDefaultValue(CudaHostCompiler()) : properties.find(CudaHostCompiler())->second);
187
188
    string pmeStreamPropValue = (properties.find(CudaDisablePmeStream()) == properties.end() ?
            getPropertyDefaultValue(CudaDisablePmeStream()) : properties.find(CudaDisablePmeStream())->second);
189
190
    string deterministicForcesValue = (properties.find(CudaDeterministicForces()) == properties.end() ?
            getPropertyDefaultValue(CudaDeterministicForces()) : properties.find(CudaDeterministicForces())->second);
191
192
    transform(blockingPropValue.begin(), blockingPropValue.end(), blockingPropValue.begin(), ::tolower);
    transform(precisionPropValue.begin(), precisionPropValue.end(), precisionPropValue.begin(), ::tolower);
193
    transform(cpuPmePropValue.begin(), cpuPmePropValue.end(), cpuPmePropValue.begin(), ::tolower);
194
    transform(pmeStreamPropValue.begin(), pmeStreamPropValue.end(), pmeStreamPropValue.begin(), ::tolower);
195
    transform(deterministicForcesValue.begin(), deterministicForcesValue.end(), deterministicForcesValue.begin(), ::tolower);
196
197
198
199
    vector<string> pmeKernelName;
    pmeKernelName.push_back(CalcPmeReciprocalForceKernel::Name());
    if (!supportsKernels(pmeKernelName))
        cpuPmePropValue = "false";
200
201
202
203
    int threads = getNumProcessors();
    char* threadsEnv = getenv("OPENMM_CPU_THREADS");
    if (threadsEnv != NULL)
        stringstream(threadsEnv) >> threads;
204
205
    char* compilerEnv = getenv("OPENMM_CUDA_COMPILER");
    bool allowRuntimeCompiler = (compilerEnv == NULL && properties.find(CudaCompiler()) == properties.end());
206
    context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue,
207
            hostCompilerPropValue, pmeStreamPropValue, deterministicForcesValue, threads, allowRuntimeCompiler, NULL));
208
209
210
211
212
213
214
215
216
217
218
219
220
221
}

void CudaPlatform::linkedContextCreated(ContextImpl& context, ContextImpl& originalContext) const {
    Platform& platform = originalContext.getPlatform();
    string devicePropValue = platform.getPropertyValue(originalContext.getOwner(), CudaDeviceIndex());
    string blockingPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaUseBlockingSync());
    string precisionPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaPrecision());
    string cpuPmePropValue = platform.getPropertyValue(originalContext.getOwner(), CudaUseCpuPme());
    string compilerPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaCompiler());
    string tempPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaTempDirectory());
    string hostCompilerPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaHostCompiler());
    string pmeStreamPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaDisablePmeStream());
    string deterministicForcesValue = platform.getPropertyValue(originalContext.getOwner(), CudaDeterministicForces());
    int threads = reinterpret_cast<PlatformData*>(originalContext.getPlatformData())->threads.getNumThreads();
222
    bool allowRuntimeCompiler = reinterpret_cast<PlatformData*>(originalContext.getPlatformData())->allowRuntimeCompiler;
223
    context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue,
224
            hostCompilerPropValue, pmeStreamPropValue, deterministicForcesValue, threads, allowRuntimeCompiler, &originalContext));
225
226
227
228
229
230
231
}

void CudaPlatform::contextDestroyed(ContextImpl& context) const {
    PlatformData* data = reinterpret_cast<PlatformData*>(context.getPlatformData());
    delete data;
}

232
CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
233
            const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty, const string& hostCompilerProperty, const string& pmeStreamProperty,
234
235
236
            const string& deterministicForcesProperty, int numThreads, bool allowRuntimeCompiler, ContextImpl* originalContext) :
                context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false),
                threads(numThreads), allowRuntimeCompiler(allowRuntimeCompiler) {
237
238
239
240
241
242
243
244
    bool blocking = (blockingProperty == "true");
    vector<string> devices;
    size_t searchPos = 0, nextPos;
    while ((nextPos = deviceIndexProperty.find_first_of(", ", searchPos)) != string::npos) {
        devices.push_back(deviceIndexProperty.substr(searchPos, nextPos-searchPos));
        searchPos = nextPos+1;
    }
    devices.push_back(deviceIndexProperty.substr(searchPos));
245
246
247
    PlatformData* originalData = NULL;
    if (originalContext != NULL)
        originalData = reinterpret_cast<PlatformData*>(originalContext->getPlatformData());
248
249
250
    try {
        for (int i = 0; i < (int) devices.size(); i++) {
            if (devices[i].length() > 0) {
peastman's avatar
peastman committed
251
                int deviceIndex;
252
                stringstream(devices[i]) >> deviceIndex;
253
                contexts.push_back(new CudaContext(system, deviceIndex, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, allowRuntimeCompiler, *this, (originalData == NULL ? NULL : originalData->contexts[i])));
254
            }
255
        }
256
        if (contexts.size() == 0)
257
            contexts.push_back(new CudaContext(system, -1, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, allowRuntimeCompiler, *this, (originalData == NULL ? NULL : originalData->contexts[0])));
258
259
260
261
262
263
264
    }
    catch (...) {
        // If an exception was thrown, do our best to clean up memory.
        
        for (int i = 0; i < (int) contexts.size(); i++)
            delete contexts[i];
        throw;
265
    }
266
    stringstream deviceIndex, deviceName;
267
    for (int i = 0; i < (int) contexts.size(); i++) {
268
269
270
271
272
273
274
275
        if (i > 0) {
            deviceIndex << ',';
            deviceName << ',';
        }
        deviceIndex << contexts[i]->getDeviceIndex();
        char name[1000];
        CHECK_RESULT(cuDeviceGetName(name, 1000, contexts[i]->getDevice()), "Error querying device name");
        deviceName << name;
276
    }
277
    useCpuPme = (cpuPmeProperty == "true" && !contexts[0]->getUseDoublePrecision());
278
    disablePmeStream = (pmeStreamProperty == "true");
279
    deterministicForces = (deterministicForcesProperty == "true");
280
281
    propertyValues[CudaPlatform::CudaDeviceIndex()] = deviceIndex.str();
    propertyValues[CudaPlatform::CudaDeviceName()] = deviceName.str();
282
    propertyValues[CudaPlatform::CudaUseBlockingSync()] = blocking ? "true" : "false";
283
    propertyValues[CudaPlatform::CudaPrecision()] = precisionProperty;
284
    propertyValues[CudaPlatform::CudaUseCpuPme()] = useCpuPme ? "true" : "false";
285
286
    propertyValues[CudaPlatform::CudaCompiler()] = compilerProperty;
    propertyValues[CudaPlatform::CudaTempDirectory()] = tempProperty;
287
    propertyValues[CudaPlatform::CudaHostCompiler()] = hostCompilerProperty;
288
    propertyValues[CudaPlatform::CudaDisablePmeStream()] = disablePmeStream ? "true" : "false";
289
    propertyValues[CudaPlatform::CudaDeterministicForces()] = deterministicForces ? "true" : "false";
290
    contextEnergy.resize(contexts.size());
291
292
293
    
    // Determine whether peer-to-peer copying is supported, and enable it if so.
    
root's avatar
root committed
294
295
296
297
298
299
300
    peerAccessSupported = true;
    for (int i = 1; i < contexts.size(); i++) {
        int canAccess;
        cuDeviceCanAccessPeer(&canAccess, contexts[i]->getDevice(), contexts[0]->getDevice());
        if (!canAccess) {
            peerAccessSupported = false;
            break;
301
302
        }
    }
303
304
305
306
307
308
309
310
}

CudaPlatform::PlatformData::~PlatformData() {
    for (int i = 0; i < (int) contexts.size(); i++)
        delete contexts[i];
}

void CudaPlatform::PlatformData::initializeContexts(const System& system) {
311
312
    if (hasInitializedContexts)
        return;
313
314
    for (int i = 0; i < (int) contexts.size(); i++)
        contexts[i]->initialize();
315
    hasInitializedContexts = true;
316
317
318
}

void CudaPlatform::PlatformData::syncContexts() {
319
320
    for (int i = 0; i < (int) contexts.size(); i++)
        contexts[i]->getWorkThread().flush();
321
}