CudaPlatform.cpp 18.3 KB
Newer Older
1
2
3
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
Evan Pretti's avatar
Evan Pretti committed
4
5
 * This is part of the OpenMM molecular simulation toolkit.                   *
 * See https://openmm.org/development.                                        *
6
 *                                                                            *
7
 * Portions copyright (c) 2008-2026 Stanford University and the Authors.      *
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

#include "CudaContext.h"
#include "CudaExpressionUtilities.h"
#include "CudaPlatform.h"
#include "CudaKernelFactory.h"
#include "CudaKernels.h"
#include "openmm/Context.h"
#include "openmm/System.h"
32
33
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/hardware.h"
34
35
36
#include <algorithm>
#include <cctype>
#include <sstream>
37
#include <cstdio>
38
39
40
#ifdef _MSC_VER
    #include <Windows.h>
#endif
41
42
43
using namespace OpenMM;
using namespace std;

44
45
46
47
48
49
50
#define CHECK_RESULT(result, prefix) \
    if (result != CUDA_SUCCESS) { \
        std::stringstream m; \
        m<<prefix<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")"<<" at "<<__FILE__<<":"<<__LINE__; \
        throw OpenMMException(m.str());\
    }

51

52
#ifdef OPENMM_COMMON_BUILDING_STATIC_LIBRARY
53
54
55
56
extern "C" void registerCudaPlatform() {
    Platform::registerPlatform(new CudaPlatform());
}
#else
57
extern "C" OPENMM_EXPORT_COMMON void registerPlatforms() {
58
59
    Platform::registerPlatform(new CudaPlatform());
}
60
#endif
61
62

CudaPlatform::CudaPlatform() {
Peter Eastman's avatar
Peter Eastman committed
63
64
65
66
67
68
69
70
    deprecatedPropertyReplacements["CudaDeviceIndex"] = CudaDeviceIndex();
    deprecatedPropertyReplacements["CudaDeviceName"] = CudaDeviceName();
    deprecatedPropertyReplacements["CudaUseBlockingSync"] = CudaUseBlockingSync();
    deprecatedPropertyReplacements["CudaPrecision"] = CudaPrecision();
    deprecatedPropertyReplacements["CudaUseCpuPme"] = CudaUseCpuPme();
    deprecatedPropertyReplacements["CudaTempDirectory"] = CudaTempDirectory();
    deprecatedPropertyReplacements["CudaDisablePmeStream"] = CudaDisablePmeStream();
    deprecatedPropertyReplacements["CudaDeterministicForces"] = CudaDeterministicForces();
71
72
73
74
75
76
77
78
79
80
81
82
83
84
    CudaKernelFactory* factory = new CudaKernelFactory();
    registerKernelFactory(CalcForcesAndEnergyKernel::Name(), factory);
    registerKernelFactory(UpdateStateDataKernel::Name(), factory);
    registerKernelFactory(ApplyConstraintsKernel::Name(), factory);
    registerKernelFactory(VirtualSitesKernel::Name(), factory);
    registerKernelFactory(CalcHarmonicBondForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomBondForceKernel::Name(), factory);
    registerKernelFactory(CalcHarmonicAngleForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomAngleForceKernel::Name(), factory);
    registerKernelFactory(CalcPeriodicTorsionForceKernel::Name(), factory);
    registerKernelFactory(CalcRBTorsionForceKernel::Name(), factory);
    registerKernelFactory(CalcCMAPTorsionForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomTorsionForceKernel::Name(), factory);
    registerKernelFactory(CalcNonbondedForceKernel::Name(), factory);
85
    registerKernelFactory(CalcConstantPotentialForceKernel::Name(), factory);
86
87
88
89
90
    registerKernelFactory(CalcCustomNonbondedForceKernel::Name(), factory);
    registerKernelFactory(CalcGBSAOBCForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomGBForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomExternalForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomHbondForceKernel::Name(), factory);
91
    registerKernelFactory(CalcCustomCentroidBondForceKernel::Name(), factory);
92
    registerKernelFactory(CalcCustomCompoundBondForceKernel::Name(), factory);
93
    registerKernelFactory(CalcCustomCPPForceKernel::Name(), factory);
94
    registerKernelFactory(CalcCustomCVForceKernel::Name(), factory);
95
    registerKernelFactory(CalcATMForceKernel::Name(), factory);
96
    registerKernelFactory(CalcOrientationRestraintForceKernel::Name(), factory);
Peter Eastman's avatar
Peter Eastman committed
97
    registerKernelFactory(CalcPythonForceKernel::Name(), factory);
98
    registerKernelFactory(CalcRGForceKernel::Name(), factory);
99
    registerKernelFactory(CalcRMSDForceKernel::Name(), factory);
100
    registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory);
peastman's avatar
peastman committed
101
    registerKernelFactory(CalcGayBerneForceKernel::Name(), factory);
Evan Pretti's avatar
Evan Pretti committed
102
    registerKernelFactory(CalcLCPOForceKernel::Name(), factory);
103
    registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
104
    registerKernelFactory(IntegrateNoseHooverStepKernel::Name(), factory);
105
    registerKernelFactory(IntegrateLangevinMiddleStepKernel::Name(), factory);
106
107
108
109
    registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);
    registerKernelFactory(IntegrateVariableVerletStepKernel::Name(), factory);
    registerKernelFactory(IntegrateVariableLangevinStepKernel::Name(), factory);
    registerKernelFactory(IntegrateCustomStepKernel::Name(), factory);
Peter Eastman's avatar
Peter Eastman committed
110
    registerKernelFactory(IntegrateDPDStepKernel::Name(), factory);
111
    registerKernelFactory(IntegrateQTBStepKernel::Name(), factory);
112
113
114
115
    registerKernelFactory(ApplyAndersenThermostatKernel::Name(), factory);
    registerKernelFactory(ApplyMonteCarloBarostatKernel::Name(), factory);
    registerKernelFactory(RemoveCMMotionKernel::Name(), factory);
    platformProperties.push_back(CudaDeviceIndex());
116
    platformProperties.push_back(CudaDeviceName());
117
118
    platformProperties.push_back(CudaUseBlockingSync());
    platformProperties.push_back(CudaPrecision());
119
    platformProperties.push_back(CudaUseCpuPme());
120
121
    platformProperties.push_back(CudaCompiler());
    platformProperties.push_back(CudaTempDirectory());
122
    platformProperties.push_back(CudaHostCompiler());
123
    platformProperties.push_back(CudaDisablePmeStream());
124
    platformProperties.push_back(CudaDeterministicForces());
125
    setPropertyDefaultValue(CudaDeviceIndex(), "");
126
    setPropertyDefaultValue(CudaDeviceName(), "");
127
    setPropertyDefaultValue(CudaUseBlockingSync(), "false");
128
    setPropertyDefaultValue(CudaPrecision(), "single");
129
    setPropertyDefaultValue(CudaUseCpuPme(), "false");
130
    setPropertyDefaultValue(CudaDisablePmeStream(), "false");
131
    setPropertyDefaultValue(CudaDeterministicForces(), "false");
132
133
    setPropertyDefaultValue(CudaCompiler(), "");
    setPropertyDefaultValue(CudaHostCompiler(), "");
134
135
136
#ifdef _MSC_VER
    setPropertyDefaultValue(CudaTempDirectory(), string(getenv("TEMP")));
#else
137
    char* tmpdir = getenv("TMPDIR");
Peter Eastman's avatar
Peter Eastman committed
138
139
    string tmp = (tmpdir == NULL ? string(P_tmpdir) : string(tmpdir));
    setPropertyDefaultValue(CudaTempDirectory(), tmp);
140
141
142
#endif
}

143
144
145
146
double CudaPlatform::getSpeed() const {
    return 100;
}

147
bool CudaPlatform::supportsDoublePrecision() const {
Peter Eastman's avatar
Peter Eastman committed
148
    return true;
149
150
151
152
153
}

const string& CudaPlatform::getPropertyValue(const Context& context, const string& property) const {
    const ContextImpl& impl = getContextImpl(context);
    const PlatformData* data = reinterpret_cast<const PlatformData*>(impl.getPlatformData());
154
155
156
157
    string propertyName = property;
    if (deprecatedPropertyReplacements.find(property) != deprecatedPropertyReplacements.end())
        propertyName = deprecatedPropertyReplacements.find(property)->second;
    map<string, string>::const_iterator value = data->propertyValues.find(propertyName);
158
159
160
161
162
163
164
165
    if (value != data->propertyValues.end())
        return value->second;
    return Platform::getPropertyValue(context, property);
}

void CudaPlatform::setPropertyValue(Context& context, const string& property, const string& value) const {
}

166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
vector<map<string, string> > CudaPlatform::getDevices(const map<string, string>& filters) const {
    try {
        CudaContext::ensureCudaInitialized();
    }
    catch (...) {
        // CUDA couldn't be initialized, so report no devices.

        return {};
    }

    // Check for properties that might act as filters.

    int deviceIndex = -1;
    if (filters.find(CudaDeviceIndex()) != filters.end())
        stringstream(filters.at(CudaDeviceIndex())) >> deviceIndex;
    string deviceName = (filters.find(CudaDeviceName()) == filters.end() ? "" : filters.at(CudaDeviceName()));

    // Loop over devices.

    vector<map<string, string> > results;
    int numDevices;
    if (cuDeviceGetCount(&numDevices) != CUDA_SUCCESS)
        numDevices = 0;
    for (int i = 0; i < numDevices; i++) {
        if (deviceIndex != -1 && deviceIndex != i)
            continue;
        char name[1000];
        CUdevice device;
        CHECK_RESULT(cuDeviceGet(&device, i), "Error querying device");
        CHECK_RESULT(cuDeviceGetName(name, 1000, device), "Error querying device name");
        stringstream deviceNameStr;
        deviceNameStr << name;
        if (deviceName.size() > 0 && deviceName != deviceNameStr.str())
            continue;
        stringstream deviceIndexStr;
        deviceIndexStr << i;
        map<string, string> properties = {{CudaDeviceIndex(), deviceIndexStr.str()},
                                          {CudaDeviceName(), deviceNameStr.str()}};
        results.push_back(properties);
    }
    return results;
}
208
209
210
211
212
213
214
void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string>& properties) const {
    const string& devicePropValue = (properties.find(CudaDeviceIndex()) == properties.end() ?
            getPropertyDefaultValue(CudaDeviceIndex()) : properties.find(CudaDeviceIndex())->second);
    string blockingPropValue = (properties.find(CudaUseBlockingSync()) == properties.end() ?
            getPropertyDefaultValue(CudaUseBlockingSync()) : properties.find(CudaUseBlockingSync())->second);
    string precisionPropValue = (properties.find(CudaPrecision()) == properties.end() ?
            getPropertyDefaultValue(CudaPrecision()) : properties.find(CudaPrecision())->second);
215
216
    string cpuPmePropValue = (properties.find(CudaUseCpuPme()) == properties.end() ?
            getPropertyDefaultValue(CudaUseCpuPme()) : properties.find(CudaUseCpuPme())->second);
217
218
    const string& tempPropValue = (properties.find(CudaTempDirectory()) == properties.end() ?
            getPropertyDefaultValue(CudaTempDirectory()) : properties.find(CudaTempDirectory())->second);
219
220
    string pmeStreamPropValue = (properties.find(CudaDisablePmeStream()) == properties.end() ?
            getPropertyDefaultValue(CudaDisablePmeStream()) : properties.find(CudaDisablePmeStream())->second);
221
222
    string deterministicForcesValue = (properties.find(CudaDeterministicForces()) == properties.end() ?
            getPropertyDefaultValue(CudaDeterministicForces()) : properties.find(CudaDeterministicForces())->second);
223
224
    transform(blockingPropValue.begin(), blockingPropValue.end(), blockingPropValue.begin(), ::tolower);
    transform(precisionPropValue.begin(), precisionPropValue.end(), precisionPropValue.begin(), ::tolower);
225
    transform(cpuPmePropValue.begin(), cpuPmePropValue.end(), cpuPmePropValue.begin(), ::tolower);
226
    transform(pmeStreamPropValue.begin(), pmeStreamPropValue.end(), pmeStreamPropValue.begin(), ::tolower);
227
    transform(deterministicForcesValue.begin(), deterministicForcesValue.end(), deterministicForcesValue.begin(), ::tolower);
228
229
230
231
    vector<string> pmeKernelName;
    pmeKernelName.push_back(CalcPmeReciprocalForceKernel::Name());
    if (!supportsKernels(pmeKernelName))
        cpuPmePropValue = "false";
232
233
234
235
    int threads = getNumProcessors();
    char* threadsEnv = getenv("OPENMM_CPU_THREADS");
    if (threadsEnv != NULL)
        stringstream(threadsEnv) >> threads;
236
237
    context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, tempPropValue,
            pmeStreamPropValue, deterministicForcesValue, threads, NULL));
238
239
240
241
242
243
244
245
246
247
248
249
}

void CudaPlatform::linkedContextCreated(ContextImpl& context, ContextImpl& originalContext) const {
    Platform& platform = originalContext.getPlatform();
    string devicePropValue = platform.getPropertyValue(originalContext.getOwner(), CudaDeviceIndex());
    string blockingPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaUseBlockingSync());
    string precisionPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaPrecision());
    string cpuPmePropValue = platform.getPropertyValue(originalContext.getOwner(), CudaUseCpuPme());
    string tempPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaTempDirectory());
    string pmeStreamPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaDisablePmeStream());
    string deterministicForcesValue = platform.getPropertyValue(originalContext.getOwner(), CudaDeterministicForces());
    int threads = reinterpret_cast<PlatformData*>(originalContext.getPlatformData())->threads.getNumThreads();
250
251
    context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, tempPropValue,
            pmeStreamPropValue, deterministicForcesValue, threads, &originalContext));
252
253
254
255
256
257
258
}

void CudaPlatform::contextDestroyed(ContextImpl& context) const {
    PlatformData* data = reinterpret_cast<PlatformData*>(context.getPlatformData());
    delete data;
}

259
CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
260
261
262
            const string& cpuPmeProperty, const string& tempProperty, const string& pmeStreamProperty, const string& deterministicForcesProperty,
            int numThreads, ContextImpl* originalContext) : context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0),
                hasInitializedContexts(false), threads(numThreads) {
263
264
265
266
267
268
269
270
    bool blocking = (blockingProperty == "true");
    vector<string> devices;
    size_t searchPos = 0, nextPos;
    while ((nextPos = deviceIndexProperty.find_first_of(", ", searchPos)) != string::npos) {
        devices.push_back(deviceIndexProperty.substr(searchPos, nextPos-searchPos));
        searchPos = nextPos+1;
    }
    devices.push_back(deviceIndexProperty.substr(searchPos));
271
272
273
    PlatformData* originalData = NULL;
    if (originalContext != NULL)
        originalData = reinterpret_cast<PlatformData*>(originalContext->getPlatformData());
274
275
276
    try {
        for (int i = 0; i < (int) devices.size(); i++) {
            if (devices[i].length() > 0) {
peastman's avatar
peastman committed
277
                int deviceIndex;
278
                stringstream(devices[i]) >> deviceIndex;
279
                contexts.push_back(new CudaContext(system, deviceIndex, blocking, precisionProperty, tempProperty, *this, (originalData == NULL ? NULL : originalData->contexts[i])));
280
            }
281
        }
282
        if (contexts.size() == 0)
283
            contexts.push_back(new CudaContext(system, -1, blocking, precisionProperty, tempProperty, *this, (originalData == NULL ? NULL : originalData->contexts[0])));
284
285
286
287
288
289
290
    }
    catch (...) {
        // If an exception was thrown, do our best to clean up memory.
        
        for (int i = 0; i < (int) contexts.size(); i++)
            delete contexts[i];
        throw;
291
    }
292
    stringstream deviceIndex, deviceName;
293
    for (int i = 0; i < (int) contexts.size(); i++) {
294
295
296
297
298
299
300
301
        if (i > 0) {
            deviceIndex << ',';
            deviceName << ',';
        }
        deviceIndex << contexts[i]->getDeviceIndex();
        char name[1000];
        CHECK_RESULT(cuDeviceGetName(name, 1000, contexts[i]->getDevice()), "Error querying device name");
        deviceName << name;
302
    }
303
    useCpuPme = (cpuPmeProperty == "true" && !contexts[0]->getUseDoublePrecision());
304
    disablePmeStream = (pmeStreamProperty == "true");
305
    deterministicForces = (deterministicForcesProperty == "true");
306
307
    propertyValues[CudaPlatform::CudaDeviceIndex()] = deviceIndex.str();
    propertyValues[CudaPlatform::CudaDeviceName()] = deviceName.str();
308
    propertyValues[CudaPlatform::CudaUseBlockingSync()] = blocking ? "true" : "false";
309
    propertyValues[CudaPlatform::CudaPrecision()] = precisionProperty;
310
    propertyValues[CudaPlatform::CudaUseCpuPme()] = useCpuPme ? "true" : "false";
311
    propertyValues[CudaPlatform::CudaCompiler()] = "";
312
    propertyValues[CudaPlatform::CudaTempDirectory()] = tempProperty;
313
    propertyValues[CudaPlatform::CudaHostCompiler()] = "";
314
    propertyValues[CudaPlatform::CudaDisablePmeStream()] = disablePmeStream ? "true" : "false";
315
    propertyValues[CudaPlatform::CudaDeterministicForces()] = deterministicForces ? "true" : "false";
316
    contextEnergy.resize(contexts.size());
317
318
319
    
    // Determine whether peer-to-peer copying is supported, and enable it if so.
    
root's avatar
root committed
320
321
322
323
324
325
326
    peerAccessSupported = true;
    for (int i = 1; i < contexts.size(); i++) {
        int canAccess;
        cuDeviceCanAccessPeer(&canAccess, contexts[i]->getDevice(), contexts[0]->getDevice());
        if (!canAccess) {
            peerAccessSupported = false;
            break;
327
328
        }
    }
329
330
331
332
333
334
335
336
}

CudaPlatform::PlatformData::~PlatformData() {
    for (int i = 0; i < (int) contexts.size(); i++)
        delete contexts[i];
}

void CudaPlatform::PlatformData::initializeContexts(const System& system) {
337
338
    if (hasInitializedContexts)
        return;
339
340
    for (int i = 0; i < (int) contexts.size(); i++)
        contexts[i]->initialize();
341
    hasInitializedContexts = true;
342
343
344
}

void CudaPlatform::PlatformData::syncContexts() {
345
346
    for (int i = 0; i < (int) contexts.size(); i++)
        contexts[i]->getWorkThread().flush();
347
}