OpenCLIntegrationUtilities.cpp 7.38 KB
Newer Older
1
2
3
4
5
6
7
8
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
9
 * Portions copyright (c) 2009-2020 Stanford University and the Authors.      *
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

#include "OpenCLIntegrationUtilities.h"
28
#include "OpenCLContext.h"
29
30
31
32

using namespace OpenMM;
using namespace std;

33
OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, const System& system) : IntegrationUtilities(context, system) {
peastman's avatar
peastman committed
34
        ccmaConvergedHostBuffer.initialize<cl_int>(context, 1, "CcmaConvergedHostBuffer", CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR);
Peter Eastman's avatar
Peter Eastman committed
35
36
37
        // Different communication mechanisms give optimal performance on AMD and on NVIDIA.
        string vendor = context.getDevice().getInfo<CL_DEVICE_VENDOR>();
        ccmaUseDirectBuffer = (vendor.size() >= 28 && vendor.substr(0, 28) == "Advanced Micro Devices, Inc.");
38
39
}

40
41
OpenCLArray& OpenCLIntegrationUtilities::getPosDelta() {
    return dynamic_cast<OpenCLContext&>(context).unwrap(posDelta);
42
43
}

44
45
OpenCLArray& OpenCLIntegrationUtilities::getRandom() {
    return dynamic_cast<OpenCLContext&>(context).unwrap(random);
46
47
}

48
49
OpenCLArray& OpenCLIntegrationUtilities::getStepSize() {
    return dynamic_cast<OpenCLContext&>(context).unwrap(stepSize);
50
51
}

52
53
void OpenCLIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, double tol) {
    ComputeKernel settleKernel, shakeKernel, ccmaForceKernel;
54
55
56
57
58
59
60
61
62
63
    if (constrainVelocities) {
        settleKernel = settleVelKernel;
        shakeKernel = shakeVelKernel;
        ccmaForceKernel = ccmaVelForceKernel;
    }
    else {
        settleKernel = settlePosKernel;
        shakeKernel = shakePosKernel;
        ccmaForceKernel = ccmaPosForceKernel;
    }
peastman's avatar
peastman committed
64
    if (settleAtoms.isInitialized()) {
65
        if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
66
            settleKernel->setArg(1, tol);
67
        else
68
69
            settleKernel->setArg(1, (float) tol);
        settleKernel->execute(settleAtoms.getSize());
70
    }
peastman's avatar
peastman committed
71
    if (shakeAtoms.isInitialized()) {
72
        if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
73
            shakeKernel->setArg(1, tol);
74
        else
75
76
            shakeKernel->setArg(1, (float) tol);
        shakeKernel->execute(shakeAtoms.getSize());
77
    }
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
    if (ccmaConstraintAtoms.isInitialized()) {
        if (ccmaConstraintAtoms.getSize() <= 1024) {
            // Use the version of CCMA that runs in a single kernel with one workgroup.
            ccmaFullKernel->setArg(0, (int) constrainVelocities);
            if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
                ccmaFullKernel->setArg(14, tol);
            else
                ccmaFullKernel->setArg(14, (float) tol);
            ccmaFullKernel->execute(128, 128);
        }
        else {
            // Use the version of CCMA that uses multiple kernels.
            ccmaForceKernel->setArg(6, ccmaConvergedHostBuffer);
            if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
                ccmaForceKernel->setArg(7, tol);
            else
                ccmaForceKernel->setArg(7, (float) tol);
            ccmaDirectionsKernel->execute(ccmaConstraintAtoms.getSize());
            const int checkInterval = 4;
            OpenCLContext& cl = dynamic_cast<OpenCLContext&>(context);
            cl::CommandQueue queue = cl.getQueue();
            int* converged = (int*) context.getPinnedBuffer();
            int* ccmaConvergedHostMemory = (int*) queue.enqueueMapBuffer(ccmaConvergedHostBuffer.getDeviceBuffer(), CL_TRUE, CL_MAP_WRITE, 0, sizeof(cl_int));
            ccmaConvergedHostMemory[0] = 0;
            queue.enqueueUnmapMemObject(ccmaConvergedHostBuffer.getDeviceBuffer(), ccmaConvergedHostMemory);
            ccmaUpdateKernel->setArg(4, constrainVelocities ? context.getVelm() : posDelta);
            for (int i = 0; i < 150; i++) {
                ccmaForceKernel->setArg(8, i);
                ccmaForceKernel->execute(ccmaConstraintAtoms.getSize());
                cl::Event event;
                if ((i+1)%checkInterval == 0 && !ccmaUseDirectBuffer)
                    queue.enqueueReadBuffer(cl.unwrap(ccmaConverged).getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(int), converged, NULL, &event);
                ccmaMultiplyKernel->setArg(5, i);
                ccmaMultiplyKernel->execute(ccmaConstraintAtoms.getSize());
                ccmaUpdateKernel->setArg(9, i);
                ccmaUpdateKernel->execute(context.getNumAtoms());
                if ((i+1)%checkInterval == 0) {
                    if (ccmaUseDirectBuffer) {
                        ccmaConvergedHostMemory = (int*) queue.enqueueMapBuffer(ccmaConvergedHostBuffer.getDeviceBuffer(), CL_FALSE, CL_MAP_READ, 0, sizeof(cl_int), NULL, &event);
                        queue.flush();
                        while (event.getInfo<CL_EVENT_COMMAND_EXECUTION_STATUS>() != CL_COMPLETE)
                            ;
                        converged[i%2] = ccmaConvergedHostMemory[0];
                        queue.enqueueUnmapMemObject(ccmaConvergedHostBuffer.getDeviceBuffer(), ccmaConvergedHostMemory);
                    }
                    else
                        event.wait();
                    if (converged[i%2])
                        break;
Peter Eastman's avatar
Peter Eastman committed
127
                }
Peter Eastman's avatar
Peter Eastman committed
128
            }
129
130
        }
    }
131
132
}

133
void OpenCLIntegrationUtilities::distributeForcesFromVirtualSites() {
Peter Eastman's avatar
Peter Eastman committed
134
    if (numVsites > 0) {
135
136
137
138
139
140
        vsiteForceKernel->setArg(2, context.getLongForceBuffer());
        vsiteForceKernel->execute(numVsites);
        vsiteSaveForcesKernel->setArg(0, context.getLongForceBuffer());
        vsiteSaveForcesKernel->setArg(1, context.getForceBuffers());
        vsiteSaveForcesKernel->execute(context.getNumAtoms());
   }
141
}