CudaPlatform.cpp 18.4 KB
Newer Older
1
2
3
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
Evan Pretti's avatar
Evan Pretti committed
4
5
 * This is part of the OpenMM molecular simulation toolkit.                   *
 * See https://openmm.org/development.                                        *
6
 *                                                                            *
7
 * Portions copyright (c) 2008-2026 Stanford University and the Authors.      *
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

#include "CudaContext.h"
#include "CudaExpressionUtilities.h"
#include "CudaPlatform.h"
#include "CudaKernelFactory.h"
#include "CudaKernels.h"
#include "openmm/Context.h"
#include "openmm/System.h"
32
33
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/hardware.h"
34
35
36
#include <algorithm>
#include <cctype>
#include <sstream>
37
#include <cstdio>
38
39
40
#ifdef _MSC_VER
    #include <Windows.h>
#endif
41
42
43
using namespace OpenMM;
using namespace std;

44
45
46
47
48
49
50
#define CHECK_RESULT(result, prefix) \
    if (result != CUDA_SUCCESS) { \
        std::stringstream m; \
        m<<prefix<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")"<<" at "<<__FILE__<<":"<<__LINE__; \
        throw OpenMMException(m.str());\
    }

51

52
#ifdef OPENMM_COMMON_BUILDING_STATIC_LIBRARY
53
54
55
56
extern "C" void registerCudaPlatform() {
    Platform::registerPlatform(new CudaPlatform());
}
#else
57
extern "C" OPENMM_EXPORT_COMMON void registerPlatforms() {
58
59
    Platform::registerPlatform(new CudaPlatform());
}
60
#endif
61
62

CudaPlatform::CudaPlatform() {
Peter Eastman's avatar
Peter Eastman committed
63
64
65
66
67
68
69
70
    deprecatedPropertyReplacements["CudaDeviceIndex"] = CudaDeviceIndex();
    deprecatedPropertyReplacements["CudaDeviceName"] = CudaDeviceName();
    deprecatedPropertyReplacements["CudaUseBlockingSync"] = CudaUseBlockingSync();
    deprecatedPropertyReplacements["CudaPrecision"] = CudaPrecision();
    deprecatedPropertyReplacements["CudaUseCpuPme"] = CudaUseCpuPme();
    deprecatedPropertyReplacements["CudaTempDirectory"] = CudaTempDirectory();
    deprecatedPropertyReplacements["CudaDisablePmeStream"] = CudaDisablePmeStream();
    deprecatedPropertyReplacements["CudaDeterministicForces"] = CudaDeterministicForces();
71
72
73
74
75
    CudaKernelFactory* factory = new CudaKernelFactory();
    registerKernelFactory(CalcForcesAndEnergyKernel::Name(), factory);
    registerKernelFactory(UpdateStateDataKernel::Name(), factory);
    registerKernelFactory(ApplyConstraintsKernel::Name(), factory);
    registerKernelFactory(VirtualSitesKernel::Name(), factory);
76
    registerKernelFactory(MinimizeKernel::Name(), factory);
77
78
79
80
81
82
83
84
85
    registerKernelFactory(CalcHarmonicBondForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomBondForceKernel::Name(), factory);
    registerKernelFactory(CalcHarmonicAngleForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomAngleForceKernel::Name(), factory);
    registerKernelFactory(CalcPeriodicTorsionForceKernel::Name(), factory);
    registerKernelFactory(CalcRBTorsionForceKernel::Name(), factory);
    registerKernelFactory(CalcCMAPTorsionForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomTorsionForceKernel::Name(), factory);
    registerKernelFactory(CalcNonbondedForceKernel::Name(), factory);
86
    registerKernelFactory(CalcConstantPotentialForceKernel::Name(), factory);
87
88
89
90
91
    registerKernelFactory(CalcCustomNonbondedForceKernel::Name(), factory);
    registerKernelFactory(CalcGBSAOBCForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomGBForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomExternalForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomHbondForceKernel::Name(), factory);
92
    registerKernelFactory(CalcCustomCentroidBondForceKernel::Name(), factory);
93
    registerKernelFactory(CalcCustomCompoundBondForceKernel::Name(), factory);
94
    registerKernelFactory(CalcCustomCPPForceKernel::Name(), factory);
95
    registerKernelFactory(CalcCustomCVForceKernel::Name(), factory);
96
    registerKernelFactory(CalcATMForceKernel::Name(), factory);
97
    registerKernelFactory(CalcOrientationRestraintForceKernel::Name(), factory);
Peter Eastman's avatar
Peter Eastman committed
98
    registerKernelFactory(CalcPythonForceKernel::Name(), factory);
99
    registerKernelFactory(CalcRGForceKernel::Name(), factory);
100
    registerKernelFactory(CalcRMSDForceKernel::Name(), factory);
101
    registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory);
peastman's avatar
peastman committed
102
    registerKernelFactory(CalcGayBerneForceKernel::Name(), factory);
Evan Pretti's avatar
Evan Pretti committed
103
    registerKernelFactory(CalcLCPOForceKernel::Name(), factory);
104
    registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
105
    registerKernelFactory(IntegrateNoseHooverStepKernel::Name(), factory);
106
    registerKernelFactory(IntegrateLangevinMiddleStepKernel::Name(), factory);
107
108
109
110
    registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);
    registerKernelFactory(IntegrateVariableVerletStepKernel::Name(), factory);
    registerKernelFactory(IntegrateVariableLangevinStepKernel::Name(), factory);
    registerKernelFactory(IntegrateCustomStepKernel::Name(), factory);
Peter Eastman's avatar
Peter Eastman committed
111
    registerKernelFactory(IntegrateDPDStepKernel::Name(), factory);
112
    registerKernelFactory(IntegrateQTBStepKernel::Name(), factory);
113
114
115
116
    registerKernelFactory(ApplyAndersenThermostatKernel::Name(), factory);
    registerKernelFactory(ApplyMonteCarloBarostatKernel::Name(), factory);
    registerKernelFactory(RemoveCMMotionKernel::Name(), factory);
    platformProperties.push_back(CudaDeviceIndex());
117
    platformProperties.push_back(CudaDeviceName());
118
119
    platformProperties.push_back(CudaUseBlockingSync());
    platformProperties.push_back(CudaPrecision());
120
    platformProperties.push_back(CudaUseCpuPme());
121
122
    platformProperties.push_back(CudaCompiler());
    platformProperties.push_back(CudaTempDirectory());
123
    platformProperties.push_back(CudaHostCompiler());
124
    platformProperties.push_back(CudaDisablePmeStream());
125
    platformProperties.push_back(CudaDeterministicForces());
126
    setPropertyDefaultValue(CudaDeviceIndex(), "");
127
    setPropertyDefaultValue(CudaDeviceName(), "");
128
    setPropertyDefaultValue(CudaUseBlockingSync(), "false");
129
    setPropertyDefaultValue(CudaPrecision(), "single");
130
    setPropertyDefaultValue(CudaUseCpuPme(), "false");
131
    setPropertyDefaultValue(CudaDisablePmeStream(), "false");
132
    setPropertyDefaultValue(CudaDeterministicForces(), "false");
133
134
    setPropertyDefaultValue(CudaCompiler(), "");
    setPropertyDefaultValue(CudaHostCompiler(), "");
135
136
137
#ifdef _MSC_VER
    setPropertyDefaultValue(CudaTempDirectory(), string(getenv("TEMP")));
#else
138
    char* tmpdir = getenv("TMPDIR");
Peter Eastman's avatar
Peter Eastman committed
139
140
    string tmp = (tmpdir == NULL ? string(P_tmpdir) : string(tmpdir));
    setPropertyDefaultValue(CudaTempDirectory(), tmp);
141
142
143
#endif
}

144
145
146
147
double CudaPlatform::getSpeed() const {
    return 100;
}

148
bool CudaPlatform::supportsDoublePrecision() const {
Peter Eastman's avatar
Peter Eastman committed
149
    return true;
150
151
152
153
154
}

const string& CudaPlatform::getPropertyValue(const Context& context, const string& property) const {
    const ContextImpl& impl = getContextImpl(context);
    const PlatformData* data = reinterpret_cast<const PlatformData*>(impl.getPlatformData());
155
156
157
158
    string propertyName = property;
    if (deprecatedPropertyReplacements.find(property) != deprecatedPropertyReplacements.end())
        propertyName = deprecatedPropertyReplacements.find(property)->second;
    map<string, string>::const_iterator value = data->propertyValues.find(propertyName);
159
160
161
162
163
164
165
166
    if (value != data->propertyValues.end())
        return value->second;
    return Platform::getPropertyValue(context, property);
}

void CudaPlatform::setPropertyValue(Context& context, const string& property, const string& value) const {
}

167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
vector<map<string, string> > CudaPlatform::getDevices(const map<string, string>& filters) const {
    try {
        CudaContext::ensureCudaInitialized();
    }
    catch (...) {
        // CUDA couldn't be initialized, so report no devices.

        return {};
    }

    // Check for properties that might act as filters.

    int deviceIndex = -1;
    if (filters.find(CudaDeviceIndex()) != filters.end())
        stringstream(filters.at(CudaDeviceIndex())) >> deviceIndex;
    string deviceName = (filters.find(CudaDeviceName()) == filters.end() ? "" : filters.at(CudaDeviceName()));

    // Loop over devices.

    vector<map<string, string> > results;
    int numDevices;
    if (cuDeviceGetCount(&numDevices) != CUDA_SUCCESS)
        numDevices = 0;
    for (int i = 0; i < numDevices; i++) {
        if (deviceIndex != -1 && deviceIndex != i)
            continue;
        char name[1000];
        CUdevice device;
        CHECK_RESULT(cuDeviceGet(&device, i), "Error querying device");
        CHECK_RESULT(cuDeviceGetName(name, 1000, device), "Error querying device name");
        stringstream deviceNameStr;
        deviceNameStr << name;
        if (deviceName.size() > 0 && deviceName != deviceNameStr.str())
            continue;
        stringstream deviceIndexStr;
        deviceIndexStr << i;
        map<string, string> properties = {{CudaDeviceIndex(), deviceIndexStr.str()},
                                          {CudaDeviceName(), deviceNameStr.str()}};
        results.push_back(properties);
    }
    return results;
}
209
210
211
212
213
214
215
void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string>& properties) const {
    const string& devicePropValue = (properties.find(CudaDeviceIndex()) == properties.end() ?
            getPropertyDefaultValue(CudaDeviceIndex()) : properties.find(CudaDeviceIndex())->second);
    string blockingPropValue = (properties.find(CudaUseBlockingSync()) == properties.end() ?
            getPropertyDefaultValue(CudaUseBlockingSync()) : properties.find(CudaUseBlockingSync())->second);
    string precisionPropValue = (properties.find(CudaPrecision()) == properties.end() ?
            getPropertyDefaultValue(CudaPrecision()) : properties.find(CudaPrecision())->second);
216
217
    string cpuPmePropValue = (properties.find(CudaUseCpuPme()) == properties.end() ?
            getPropertyDefaultValue(CudaUseCpuPme()) : properties.find(CudaUseCpuPme())->second);
218
219
    const string& tempPropValue = (properties.find(CudaTempDirectory()) == properties.end() ?
            getPropertyDefaultValue(CudaTempDirectory()) : properties.find(CudaTempDirectory())->second);
220
221
    string pmeStreamPropValue = (properties.find(CudaDisablePmeStream()) == properties.end() ?
            getPropertyDefaultValue(CudaDisablePmeStream()) : properties.find(CudaDisablePmeStream())->second);
222
223
    string deterministicForcesValue = (properties.find(CudaDeterministicForces()) == properties.end() ?
            getPropertyDefaultValue(CudaDeterministicForces()) : properties.find(CudaDeterministicForces())->second);
224
225
    transform(blockingPropValue.begin(), blockingPropValue.end(), blockingPropValue.begin(), ::tolower);
    transform(precisionPropValue.begin(), precisionPropValue.end(), precisionPropValue.begin(), ::tolower);
226
    transform(cpuPmePropValue.begin(), cpuPmePropValue.end(), cpuPmePropValue.begin(), ::tolower);
227
    transform(pmeStreamPropValue.begin(), pmeStreamPropValue.end(), pmeStreamPropValue.begin(), ::tolower);
228
    transform(deterministicForcesValue.begin(), deterministicForcesValue.end(), deterministicForcesValue.begin(), ::tolower);
229
230
231
232
    vector<string> pmeKernelName;
    pmeKernelName.push_back(CalcPmeReciprocalForceKernel::Name());
    if (!supportsKernels(pmeKernelName))
        cpuPmePropValue = "false";
233
234
235
236
    int threads = getNumProcessors();
    char* threadsEnv = getenv("OPENMM_CPU_THREADS");
    if (threadsEnv != NULL)
        stringstream(threadsEnv) >> threads;
237
238
    context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, tempPropValue,
            pmeStreamPropValue, deterministicForcesValue, threads, NULL));
239
240
241
242
243
244
245
246
247
248
249
250
}

void CudaPlatform::linkedContextCreated(ContextImpl& context, ContextImpl& originalContext) const {
    Platform& platform = originalContext.getPlatform();
    string devicePropValue = platform.getPropertyValue(originalContext.getOwner(), CudaDeviceIndex());
    string blockingPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaUseBlockingSync());
    string precisionPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaPrecision());
    string cpuPmePropValue = platform.getPropertyValue(originalContext.getOwner(), CudaUseCpuPme());
    string tempPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaTempDirectory());
    string pmeStreamPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaDisablePmeStream());
    string deterministicForcesValue = platform.getPropertyValue(originalContext.getOwner(), CudaDeterministicForces());
    int threads = reinterpret_cast<PlatformData*>(originalContext.getPlatformData())->threads.getNumThreads();
251
252
    context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, tempPropValue,
            pmeStreamPropValue, deterministicForcesValue, threads, &originalContext));
253
254
255
256
257
258
259
}

void CudaPlatform::contextDestroyed(ContextImpl& context) const {
    PlatformData* data = reinterpret_cast<PlatformData*>(context.getPlatformData());
    delete data;
}

260
CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
261
262
263
            const string& cpuPmeProperty, const string& tempProperty, const string& pmeStreamProperty, const string& deterministicForcesProperty,
            int numThreads, ContextImpl* originalContext) : context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0),
                hasInitializedContexts(false), threads(numThreads) {
264
265
266
267
268
269
270
271
    bool blocking = (blockingProperty == "true");
    vector<string> devices;
    size_t searchPos = 0, nextPos;
    while ((nextPos = deviceIndexProperty.find_first_of(", ", searchPos)) != string::npos) {
        devices.push_back(deviceIndexProperty.substr(searchPos, nextPos-searchPos));
        searchPos = nextPos+1;
    }
    devices.push_back(deviceIndexProperty.substr(searchPos));
272
273
274
    PlatformData* originalData = NULL;
    if (originalContext != NULL)
        originalData = reinterpret_cast<PlatformData*>(originalContext->getPlatformData());
275
276
277
    try {
        for (int i = 0; i < (int) devices.size(); i++) {
            if (devices[i].length() > 0) {
peastman's avatar
peastman committed
278
                int deviceIndex;
279
                stringstream(devices[i]) >> deviceIndex;
280
                contexts.push_back(new CudaContext(system, deviceIndex, blocking, precisionProperty, tempProperty, *this, (originalData == NULL ? NULL : originalData->contexts[i])));
281
            }
282
        }
283
        if (contexts.size() == 0)
284
            contexts.push_back(new CudaContext(system, -1, blocking, precisionProperty, tempProperty, *this, (originalData == NULL ? NULL : originalData->contexts[0])));
285
286
287
288
289
290
291
    }
    catch (...) {
        // If an exception was thrown, do our best to clean up memory.
        
        for (int i = 0; i < (int) contexts.size(); i++)
            delete contexts[i];
        throw;
292
    }
293
    stringstream deviceIndex, deviceName;
294
    for (int i = 0; i < (int) contexts.size(); i++) {
295
296
297
298
299
300
301
302
        if (i > 0) {
            deviceIndex << ',';
            deviceName << ',';
        }
        deviceIndex << contexts[i]->getDeviceIndex();
        char name[1000];
        CHECK_RESULT(cuDeviceGetName(name, 1000, contexts[i]->getDevice()), "Error querying device name");
        deviceName << name;
303
    }
304
    useCpuPme = (cpuPmeProperty == "true" && !contexts[0]->getUseDoublePrecision());
305
    disablePmeStream = (pmeStreamProperty == "true");
306
    deterministicForces = (deterministicForcesProperty == "true");
307
308
    propertyValues[CudaPlatform::CudaDeviceIndex()] = deviceIndex.str();
    propertyValues[CudaPlatform::CudaDeviceName()] = deviceName.str();
309
    propertyValues[CudaPlatform::CudaUseBlockingSync()] = blocking ? "true" : "false";
310
    propertyValues[CudaPlatform::CudaPrecision()] = precisionProperty;
311
    propertyValues[CudaPlatform::CudaUseCpuPme()] = useCpuPme ? "true" : "false";
312
    propertyValues[CudaPlatform::CudaCompiler()] = "";
313
    propertyValues[CudaPlatform::CudaTempDirectory()] = tempProperty;
314
    propertyValues[CudaPlatform::CudaHostCompiler()] = "";
315
    propertyValues[CudaPlatform::CudaDisablePmeStream()] = disablePmeStream ? "true" : "false";
316
    propertyValues[CudaPlatform::CudaDeterministicForces()] = deterministicForces ? "true" : "false";
317
    contextEnergy.resize(contexts.size());
318
319
320
    
    // Determine whether peer-to-peer copying is supported, and enable it if so.
    
root's avatar
root committed
321
322
323
324
325
326
327
    peerAccessSupported = true;
    for (int i = 1; i < contexts.size(); i++) {
        int canAccess;
        cuDeviceCanAccessPeer(&canAccess, contexts[i]->getDevice(), contexts[0]->getDevice());
        if (!canAccess) {
            peerAccessSupported = false;
            break;
328
329
        }
    }
330
331
332
333
334
335
336
337
}

CudaPlatform::PlatformData::~PlatformData() {
    for (int i = 0; i < (int) contexts.size(); i++)
        delete contexts[i];
}

void CudaPlatform::PlatformData::initializeContexts(const System& system) {
338
339
    if (hasInitializedContexts)
        return;
340
341
    for (int i = 0; i < (int) contexts.size(); i++)
        contexts[i]->initialize();
342
    hasInitializedContexts = true;
343
344
345
}

void CudaPlatform::PlatformData::syncContexts() {
346
347
    for (int i = 0; i < (int) contexts.size(); i++)
        contexts[i]->getWorkThread().flush();
348
}