CudaBondedUtilities.cpp 9.98 KB
Newer Older
1
2
3
4
5
6
7
8
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
9
 * Portions copyright (c) 2011-2016 Stanford University and the Authors.      *
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

#include "CudaBondedUtilities.h"
#include "CudaExpressionUtilities.h"
29
#include "CudaKernelSources.h"
30
31
32
33
34
35
36
#include "openmm/OpenMMException.h"
#include "CudaNonbondedUtilities.h"
#include <iostream>

using namespace OpenMM;
using namespace std;

peastman's avatar
peastman committed
37
CudaBondedUtilities::CudaBondedUtilities(CudaContext& context) : context(context), numForceBuffers(0), maxBonds(0), allGroups(0), hasInitializedKernels(false) {
38
39
40
41
42
43
44
45
46
47
48
49
50
}

CudaBondedUtilities::~CudaBondedUtilities() {
    for (int i = 0; i < (int) atomIndices.size(); i++)
        for (int j = 0; j < (int) atomIndices[i].size(); j++)
            delete atomIndices[i][j];
}

void CudaBondedUtilities::addInteraction(const vector<vector<int> >& atoms, const string& source, int group) {
    if (atoms.size() > 0) {
        forceAtoms.push_back(atoms);
        forceSource.push_back(source);
        forceGroup.push_back(group);
peastman's avatar
peastman committed
51
        allGroups |= 1<<group;
52
53
54
    }
}

55
string CudaBondedUtilities::addArgument(CUdeviceptr data, const string& type) {
56
57
58
59
60
    arguments.push_back(data);
    argTypes.push_back(type);
    return "customArg"+context.intToString(arguments.size());
}

61
62
63
64
65
66
67
68
69
70
71
72
73
string CudaBondedUtilities::addEnergyParameterDerivative(const string& param) {
    // See if the parameter has already been added.
    
    int index;
    for (index = 0; index < energyParameterDerivatives.size(); index++)
        if (param == energyParameterDerivatives[index])
            break;
    if (index == energyParameterDerivatives.size())
        energyParameterDerivatives.push_back(param);
    context.addEnergyParameterDerivative(param);
    return string("energyParamDeriv")+context.intToString(index);
}

74
void CudaBondedUtilities::addPrefixCode(const string& source) {
75
76
77
    for (int i = 0; i < (int) prefixCode.size(); i++)
        if (prefixCode[i] == source)
            return;
78
79
80
81
82
    prefixCode.push_back(source);
}

void CudaBondedUtilities::initialize(const System& system) {
    int numForces = forceAtoms.size();
83
84
    hasInteractions = (numForces > 0);
    if (!hasInteractions)
85
86
87
88
89
90
91
92
93
94
        return;
    
    // Build the lists of atom indices.
    
    atomIndices.resize(numForces);
    for (int i = 0; i < numForces; i++) {
        int numBonds = forceAtoms[i].size();
        int numAtoms = forceAtoms[i][0].size();
        int startAtom = 0;
        while (startAtom < numAtoms) {
95
            int width = min(numAtoms-startAtom, 4);
96
            int paddedWidth = (width == 3 ? 4 : width);
97
            vector<unsigned int> indexVec(paddedWidth*numBonds);
98
99
            for (int bond = 0; bond < numBonds; bond++) {
                for (int atom = 0; atom < width; atom++)
100
                    indexVec[bond*paddedWidth+atom] = forceAtoms[i][bond][startAtom+atom];
101
            }
102
            CudaArray* indices = new CudaArray(context, numBonds, 4*paddedWidth, "bondedIndices");
103
            indices->upload(&indexVec[0]);
104
105
106
107
108
109
110
111
            atomIndices[i].push_back(indices);
            startAtom += width;
        }
    }

    // Create the kernel.

    stringstream s;
112
    s<<CudaKernelSources::vectorOps;
113
114
    for (int i = 0; i < (int) prefixCode.size(); i++)
        s<<prefixCode[i];
115
    s<<"extern \"C\" __global__ void computeBondedForces(unsigned long long* __restrict__ forceBuffer, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, int groups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ";
116
117
118
    for (int force = 0; force < numForces; force++) {
        for (int i = 0; i < (int) atomIndices[force].size(); i++) {
            int indexWidth = atomIndices[force][i]->getElementSize()/4;
119
            string indexType = "uint"+context.intToString(indexWidth);
120
121
122
123
124
            s<<", const "<<indexType<<"* __restrict__ atomIndices"<<force<<"_"<<i;
        }
    }
    for (int i = 0; i < (int) arguments.size(); i++)
        s<<", "<<argTypes[i]<<"* customArg"<<(i+1);
125
126
    if (energyParameterDerivatives.size() > 0)
        s<<", mixed* __restrict__ energyParamDerivs";
127
    s<<") {\n";
128
    s<<"mixed energy = 0;\n";
129
130
    for (int i = 0; i < energyParameterDerivatives.size(); i++)
        s<<"mixed energyParamDeriv"<<i<<" = 0;\n";
131
132
133
    for (int force = 0; force < numForces; force++)
        s<<createForceSource(force, forceAtoms[force].size(), forceAtoms[force][0].size(), forceGroup[force], forceSource[force]);
    s<<"energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;\n";
134
135
136
137
138
139
    const vector<string>& allParamDerivNames = context.getEnergyParamDerivNames();
    int numDerivs = allParamDerivNames.size();
    for (int i = 0; i < energyParameterDerivatives.size(); i++)
        for (int index = 0; index < numDerivs; index++)
            if (allParamDerivNames[index] == energyParameterDerivatives[i])
                s<<"energyParamDerivs[(blockIdx.x*blockDim.x+threadIdx.x)*"<<numDerivs<<"+"<<index<<"] += energyParamDeriv"<<i<<";\n";
140
141
142
143
144
145
146
147
148
149
150
    s<<"}\n";
    map<string, string> defines;
    defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms());
    CUmodule module = context.createModule(s.str(), defines);
    kernel = context.getKernel(module, "computeBondedForces");
    forceAtoms.clear();
    forceSource.clear();
}

string CudaBondedUtilities::createForceSource(int forceIndex, int numBonds, int numAtoms, int group, const string& computeForce) {
    maxBonds = max(maxBonds, numBonds);
151
    string suffix[] = {".x", ".y", ".z", ".w"};
152
153
154
155
156
157
    stringstream s;
    s<<"if ((groups&"<<(1<<group)<<") != 0)\n";
    s<<"for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < "<<numBonds<<"; index += blockDim.x*gridDim.x) {\n";
    int startAtom = 0;
    for (int i = 0; i < (int) atomIndices[forceIndex].size(); i++) {
        int indexWidth = atomIndices[forceIndex][i]->getElementSize()/4;
158
        string indexType = "uint"+context.intToString(indexWidth);
159
        s<<"    "<<indexType<<" atoms"<<i<<" = atomIndices"<<forceIndex<<"_"<<i<<"[index];\n";
160
161
        int atomsToLoad = min(indexWidth, numAtoms-startAtom);
        for (int j = 0; j < atomsToLoad; j++) {
162
            s<<"    unsigned int atom"<<(startAtom+j+1)<<" = atoms"<<i<<suffix[j]<<";\n";
163
            s<<"    real4 pos"<<(startAtom+j+1)<<" = posq[atom"<<(startAtom+j+1)<<"];\n";
164
165
166
167
168
        }
        startAtom += indexWidth;
    }
    s<<computeForce<<"\n";
    for (int i = 0; i < numAtoms; i++) {
169
170
171
        s<<"    atomicAdd(&forceBuffer[atom"<<(i+1)<<"], static_cast<unsigned long long>((long long) (force"<<(i+1)<<".x*0x100000000)));\n";
        s<<"    atomicAdd(&forceBuffer[atom"<<(i+1)<<"+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force"<<(i+1)<<".y*0x100000000)));\n";
        s<<"    atomicAdd(&forceBuffer[atom"<<(i+1)<<"+PADDED_NUM_ATOMS*2], static_cast<unsigned long long>((long long) (force"<<(i+1)<<".z*0x100000000)));\n";
172
        s<<"    __threadfence_block();\n";
173
174
175
176
177
178
    }
    s<<"}\n";
    return s.str();
}

void CudaBondedUtilities::computeInteractions(int groups) {
peastman's avatar
peastman committed
179
180
    if ((groups&allGroups) == 0)
        return;
181
182
183
184
185
186
    if (!hasInitializedKernels) {
        hasInitializedKernels = true;
        kernelArgs.push_back(&context.getForce().getDevicePointer());
        kernelArgs.push_back(&context.getEnergyBuffer().getDevicePointer());
        kernelArgs.push_back(&context.getPosq().getDevicePointer());
        kernelArgs.push_back(NULL);
187
188
189
190
191
        kernelArgs.push_back(context.getPeriodicBoxSizePointer());
        kernelArgs.push_back(context.getInvPeriodicBoxSizePointer());
        kernelArgs.push_back(context.getPeriodicBoxVecXPointer());
        kernelArgs.push_back(context.getPeriodicBoxVecYPointer());
        kernelArgs.push_back(context.getPeriodicBoxVecZPointer());
192
193
194
195
196
        for (int i = 0; i < (int) atomIndices.size(); i++)
            for (int j = 0; j < (int) atomIndices[i].size(); j++)
                kernelArgs.push_back(&atomIndices[i][j]->getDevicePointer());
        for (int i = 0; i < (int) arguments.size(); i++)
            kernelArgs.push_back(&arguments[i]);
197
198
        if (energyParameterDerivatives.size() > 0)
            kernelArgs.push_back(&context.getEnergyParamDerivBuffer().getDevicePointer());
199
    }
200
201
    if (!hasInteractions)
        return;
202
203
    kernelArgs[3] = &groups;
    context.executeKernel(kernel, &kernelArgs[0], maxBonds);
204
}