CpuGBSAOBCForce.cpp 16.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27

/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
 * Contributors: Pande Group
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject
 * to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "CpuGBSAOBCForce.h"
#include "SimTKOpenMMRealType.h"
#include "openmm/internal/vectorize.h"
28
#include "gmx_atomic.h"
29
#include <algorithm>
30
#include <cmath>
31
#include <cstdlib>
32
33
34
35

using namespace std;
using namespace OpenMM;

36
37
38
const int CpuGBSAOBCForce::NUM_TABLE_POINTS = 4096;
const float CpuGBSAOBCForce::TABLE_MIN = 0.25f;
const float CpuGBSAOBCForce::TABLE_MAX = 1.5f;
39

40
41
42
43
44
45
46
47
48
49
50
class CpuGBSAOBCForce::ComputeTask : public ThreadPool::Task {
public:
    ComputeTask(CpuGBSAOBCForce& owner) : owner(owner) {
    }
    void execute(ThreadPool& threads, int threadIndex) {
        owner.threadComputeForce(threads, threadIndex);
    }
    CpuGBSAOBCForce& owner;
};

CpuGBSAOBCForce::CpuGBSAOBCForce() : cutoff(false), periodic(false) {
51
    logDX = (TABLE_MAX-TABLE_MIN)/NUM_TABLE_POINTS;
52
    logDXInv = 1.0f/logDX;
peastman's avatar
peastman committed
53
54
    logTable.resize(NUM_TABLE_POINTS+4);
    for (int i = 0; i < NUM_TABLE_POINTS+4; i++) {
55
        double x = TABLE_MIN+i*logDX;
56
        logTable[i] = log(x);
57
    }
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
}

void CpuGBSAOBCForce::setUseCutoff(float distance) {
    cutoff = true;
    cutoffDistance = distance;
}

void CpuGBSAOBCForce::setPeriodic(float* periodicBoxSize) {
    periodic = true;
    this->periodicBoxSize[0] = periodicBoxSize[0];
    this->periodicBoxSize[1] = periodicBoxSize[1];
    this->periodicBoxSize[2] = periodicBoxSize[2];
}

void CpuGBSAOBCForce::setSoluteDielectric(float dielectric) {
    soluteDielectric = dielectric;
}

void CpuGBSAOBCForce::setSolventDielectric(float dielectric) {
    solventDielectric = dielectric;
}

const std::vector<std::pair<float, float> >& CpuGBSAOBCForce::getParticleParameters() const {
    return particleParams;
}

void CpuGBSAOBCForce::setParticleParameters(const std::vector<std::pair<float, float> >& params) {
    particleParams = params;
86
87
    bornRadii.resize(params.size()+3);
    obcChain.resize(params.size()+3);
88
89
}

90
void CpuGBSAOBCForce::computeForce(const AlignedArray<float>& posq, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
91
92
93
94
95
96
97
98
99
    // Record the parameters for the threads.
    
    this->posq = &posq[0];
    this->threadForce = &threadForce;
    includeEnergy = (totalEnergy != NULL);
    int numThreads = threads.getNumThreads();
    threadEnergy.resize(numThreads);
    threadBornForces.resize(numThreads);
    for (int i = 0; i < numThreads; i++)
100
        threadBornForces[i].resize(particleParams.size()+3);
101
102
    gmx_atomic_t counter;
    this->atomicCounter = &counter;
103
104
105
106
    
    // Signal the threads to start running and wait for them to finish.
    
    ComputeTask task(*this);
107
    gmx_atomic_set(&counter, 0);
108
109
    threads.execute(task);
    threads.waitForThreads(); // Compute Born radii
110
    gmx_atomic_set(&counter, 0);
111
112
    threads.resumeThreads();
    threads.waitForThreads(); // Compute surface area term
113
    gmx_atomic_set(&counter, 0);
114
115
    threads.resumeThreads();
    threads.waitForThreads(); // First loop
116
    gmx_atomic_set(&counter, 0);
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
    threads.resumeThreads();
    threads.waitForThreads(); // Second loop
    
    // Combine the energies from all the threads.
    
    if (totalEnergy != NULL) {
        double energy = 0;
        for (int i = 0; i < numThreads; i++)
            energy += threadEnergy[i];
        *totalEnergy += (float) energy;
    }
}

void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
    int numParticles = particleParams.size();
    int numThreads = threads.getNumThreads();
    const float dielectricOffset = 0.009;
    const float alphaObc = 1.0f;
    const float betaObc = 0.8f;
    const float gammaObc = 4.85f;
    fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
    fvec4 invBoxSize((1/periodicBoxSize[0]), (1/periodicBoxSize[1]), (1/periodicBoxSize[2]), 0);
    int start = (threadIndex*numParticles)/numThreads;
    int end = ((threadIndex+1)*numParticles)/numThreads;

    // Calculate Born radii

144
145
146
147
148
    while (true) {
        int blockStart = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 4);
        if (blockStart >= numParticles)
            break;
        int numInBlock = min(4, numParticles-blockStart);
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
        ivec4 blockAtomIndex(blockStart, blockStart+1, blockStart+2, blockStart+3);
        float atomRadius[4], atomx[4], atomy[4], atomz[4];
        int blockMask[4] = {0, 0, 0, 0};
        for (int i = 0; i < numInBlock; i++) {
            int atomIndex = blockStart+i;
            atomRadius[i] = particleParams[atomIndex].first;
            atomx[i] = posq[4*atomIndex];
            atomy[i] = posq[4*atomIndex+1];
            atomz[i] = posq[4*atomIndex+2];
            blockMask[i] = 0xFFFFFFFF;
        }
        fvec4 offsetRadiusI(atomRadius);
        fvec4 radiusIInverse = 1.0f/offsetRadiusI;
        fvec4 x(atomx);
        fvec4 y(atomy);
        fvec4 z(atomz);
        ivec4 mask(blockMask);
        float sum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
167
        for (int atomJ = 0; atomJ < numParticles; atomJ++) {
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
            fvec4 posJ(posq+4*atomJ);
            fvec4 dx, dy, dz, r2;
            getDeltaR(posJ, x, y, z, dx, dy, dz, r2, periodic, boxSize, invBoxSize);
            ivec4 include = mask & (blockAtomIndex != ivec4(atomJ));
            if (cutoff)
                include = include & (r2 < cutoffDistance*cutoffDistance);
            if (!any(include))
                continue;
            fvec4 r = sqrt(r2);
            float scaledRadiusJ = particleParams[atomJ].second;
            float scaledRadiusJ2 = scaledRadiusJ*scaledRadiusJ;
            fvec4 rScaledRadiusJ = r + scaledRadiusJ;
            include = include & (offsetRadiusI < rScaledRadiusJ);
            fvec4 l_ij = 1.0f/max(offsetRadiusI, abs(r-scaledRadiusJ));
            fvec4 u_ij = 1.0f/rScaledRadiusJ;
            fvec4 l_ij2 = l_ij*l_ij;
            fvec4 u_ij2 = u_ij*u_ij;
            fvec4 rInverse = 1.0f/r;
            fvec4 r2Inverse = rInverse*rInverse;
187
            fvec4 logRatio = fastLog(u_ij/l_ij);
188
189
190
191
192
193
            fvec4 term = l_ij - u_ij + 0.25f*r*(u_ij2 - l_ij2) + (0.5f*rInverse*logRatio) + (0.25f*scaledRadiusJ*scaledRadiusJ*rInverse)*(l_ij2 - u_ij2);
            for (int j = 0; j < 4; j++) {
                if (include[j]) {
                    sum[j] += term[j];
                    if (offsetRadiusI[j] < scaledRadiusJ-r[j])
                        sum[j] += 2.0f*(radiusIInverse[j]-l_ij[j]);
194
195
196
                }
            }
        }
197
198
        for (int i = 0; i < numInBlock; i++) {
            int atomIndex = blockStart+i;
peastman's avatar
peastman committed
199
            sum[i] *= 0.5f*atomRadius[i];
200
201
202
            float sum2 = sum[i]*sum[i];
            float sum3 = sum[i]*sum2;
            float tanhSum = tanh(alphaObc*sum[i] - betaObc*sum2 + gammaObc*sum3);
peastman's avatar
peastman committed
203
204
205
            float radiusI = atomRadius[i] + dielectricOffset;
            bornRadii[atomIndex] = 1.0f/(1.0f/atomRadius[i] - tanhSum/radiusI);
            obcChain[atomIndex] = atomRadius[i]*(alphaObc - 2.0f*betaObc*sum[i] + 3.0f*gammaObc*sum2);
206
207
            obcChain[atomIndex] = (1.0f - tanhSum*tanhSum)*obcChain[atomIndex]/radiusI;
        }
208
209
210
211
212
213
214
215
216
217
218
    }
    threads.syncThreads();

    // Calculate ACE surface area term.

    const float probeRadius = 0.14f;
    const float surfaceAreaFactor = 28.3919551;
    double energy = 0.0;
    vector<float>& bornForces = threadBornForces[threadIndex];
    for (int i = 0; i < numParticles; i++)
        bornForces[i] = 0.0f;
219
220
221
222
    while (true) {
        int atomI = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
        if (atomI >= numParticles)
            break;
223
        if (bornRadii[atomI] > 0) {
224
225
226
            float radiusI = particleParams[atomI].first + dielectricOffset;
            float r = radiusI + probeRadius;
            float ratio6 = powf(radiusI/bornRadii[atomI], 6.0f);
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
            float saTerm = surfaceAreaFactor*r*r*ratio6;
            energy += saTerm;
            bornForces[atomI] = -6.0f*saTerm/bornRadii[atomI]; 
        }
        else
            bornForces[atomI] = 0.0f;
    }
    threads.syncThreads();
 
    // First loop of Born energy computation.

    float* forces = &(*threadForce)[threadIndex][0];
    float preFactor;
    if (soluteDielectric != 0.0f && solventDielectric != 0.0f)
        preFactor = ONE_4PI_EPS0*((1.0f/solventDielectric) - (1.0f/soluteDielectric));
    else
        preFactor = 0.0f;
244
245
246
247
248
    while (true) {
        int blockStart = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 4);
        if (blockStart >= numParticles)
            break;
        int numInBlock = min(4, numParticles-blockStart);
249
250
251
        ivec4 blockAtomIndex(blockStart, blockStart+1, blockStart+2, blockStart+3);
        float atomCharge[4], atomx[4], atomy[4], atomz[4];
        int blockMask[4] = {0, 0, 0, 0};
peastman's avatar
peastman committed
252
        fvec4 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f), blockAtomBornForce(0.0f);
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
        for (int i = 0; i < numInBlock; i++) {
            int atomIndex = blockStart+i;
            atomx[i] = posq[4*atomIndex];
            atomy[i] = posq[4*atomIndex+1];
            atomz[i] = posq[4*atomIndex+2];
            atomCharge[i] = preFactor*posq[4*atomIndex+3];
            blockMask[i] = 0xFFFFFFFF;
        }
        fvec4 radii(&bornRadii[blockStart]);
        fvec4 x(atomx);
        fvec4 y(atomy);
        fvec4 z(atomz);
        fvec4 partialChargeI(atomCharge);
        ivec4 mask(blockMask);
        for (int atomJ = blockStart; atomJ < numParticles; atomJ++) {
268
            fvec4 posJ(posq+4*atomJ);
269
270
271
272
273
274
            fvec4 dx, dy, dz, r2;
            getDeltaR(posJ, x, y, z, dx, dy, dz, r2, periodic, boxSize, invBoxSize);
            ivec4 include = mask & (blockAtomIndex <= ivec4(atomJ));
            if (cutoff)
                include = include & (r2 < cutoffDistance*cutoffDistance);
            if (!any(include))
275
                continue;
276
277
278
            fvec4 r = sqrt(r2);
            fvec4 alpha2_ij = radii*bornRadii[atomJ];
            fvec4 D_ij = r2/(4.0f*alpha2_ij);
peastman's avatar
peastman committed
279
            fvec4 expTerm(expf(-D_ij[0]), expf(-D_ij[1]), expf(-D_ij[2]), expf(-D_ij[3]));
280
281
282
283
284
            fvec4 denominator2 = r2 + alpha2_ij*expTerm; 
            fvec4 denominator = sqrt(denominator2);
            fvec4 Gpol = (partialChargeI*posJ[3])/denominator; 
            fvec4 dGpol_dr = -Gpol*(1.0f - 0.25f*expTerm)/denominator2;  
            fvec4 dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f + D_ij)/denominator2;
peastman's avatar
peastman committed
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
            dGpol_dr = blend(0.0f, dGpol_dr, include);
            dGpol_dalpha2_ij = blend(0.0f, dGpol_dalpha2_ij, include);
            fvec4 fx = dx*dGpol_dr;
            fvec4 fy = dy*dGpol_dr;
            fvec4 fz = dz*dGpol_dr;
            blockAtomForceX -= fx;
            blockAtomForceY -= fy;
            blockAtomForceZ -= fz;
            blockAtomBornForce += dGpol_dalpha2_ij*bornRadii[atomJ];
            float* atomForce = forces+4*atomJ;
            fvec4 one(1.0f);
            atomForce[0] += dot4(fx, one);
            atomForce[1] += dot4(fy, one);
            atomForce[2] += dot4(fz, one);
            ivec4 atomJMask = include & (blockAtomIndex != ivec4(atomJ));
            fvec4 termEnergy = blend(0.0f, Gpol, include);
            if (cutoff)
                termEnergy -= blend(0.0f, partialChargeI*posJ[3]/cutoffDistance, atomJMask);
            termEnergy *= blend(0.5f, 1.0f, atomJMask);
            energy += dot4(termEnergy, one);
            bornForces[atomJ] += dot4(blend(0.0f, dGpol_dalpha2_ij, atomJMask), radii);
306
        }
peastman's avatar
peastman committed
307
308
        fvec4 f[4] = {blockAtomForceX, blockAtomForceY, blockAtomForceZ, 0.0f};
        transpose(f[0], f[1], f[2], f[3]);
309
310
        for (int i = 0; i < numInBlock; i++) {
            int atomIndex = blockStart+i;
peastman's avatar
peastman committed
311
312
            (fvec4(forces+4*atomIndex)+f[i]).store(forces+4*atomIndex);
            bornForces[atomIndex] += blockAtomBornForce[i];
313
        }
314
315
316
317
318
    }
    threads.syncThreads();

    // Second loop of Born energy computation.

319
320
321
322
    while (true) {
        int blockStart = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 4);
        if (blockStart >= numParticles)
            break;
323
        fvec4 bornForce(0.0f);
324
        for (int i = 0; i < numThreads; i++)
325
326
327
            bornForce += fvec4(&threadBornForces[i][blockStart]);
        fvec4 radii(&bornRadii[blockStart]);
        bornForce *= radii*radii*fvec4(&obcChain[blockStart]);
328
        int numInBlock = min(4, numParticles-blockStart);
329
330
331
        ivec4 blockAtomIndex(blockStart, blockStart+1, blockStart+2, blockStart+3);
        float atomRadius[4], atomx[4], atomy[4], atomz[4];
        int blockMask[4] = {0, 0, 0, 0};
peastman's avatar
peastman committed
332
        fvec4 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
333
334
335
336
337
338
339
340
341
342
343
344
345
        for (int i = 0; i < numInBlock; i++) {
            int atomIndex = blockStart+i;
            atomRadius[i] = particleParams[atomIndex].first;
            atomx[i] = posq[4*atomIndex];
            atomy[i] = posq[4*atomIndex+1];
            atomz[i] = posq[4*atomIndex+2];
            blockMask[i] = 0xFFFFFFFF;
        }
        fvec4 offsetRadiusI(atomRadius);
        fvec4 x(atomx);
        fvec4 y(atomy);
        fvec4 z(atomz);
        ivec4 mask(blockMask);
346
        for (int atomJ = 0; atomJ < numParticles; atomJ++) {
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
            fvec4 posJ(posq+4*atomJ);
            fvec4 dx, dy, dz, r2;
            getDeltaR(posJ, x, y, z, dx, dy, dz, r2, periodic, boxSize, invBoxSize);
            ivec4 include = mask & (blockAtomIndex != ivec4(atomJ));
            if (cutoff)
                include = include & (r2 < cutoffDistance*cutoffDistance);
            if (!any(include))
                continue;
            fvec4 r = sqrt(r2);
            float scaledRadiusJ = particleParams[atomJ].second;
            float scaledRadiusJ2 = scaledRadiusJ*scaledRadiusJ;
            fvec4 rScaledRadiusJ = r + scaledRadiusJ;
            include = include & (offsetRadiusI < rScaledRadiusJ);
            fvec4 l_ij = 1.0f/max(offsetRadiusI, abs(r-scaledRadiusJ));
            fvec4 u_ij = 1.0f/rScaledRadiusJ;
            fvec4 l_ij2 = l_ij*l_ij;
            fvec4 u_ij2 = u_ij*u_ij;
            fvec4 rInverse = 1.0f/r;
            fvec4 r2Inverse = rInverse*rInverse;
366
            fvec4 logRatio = fastLog(u_ij/l_ij);
367
368
            fvec4 t3 = 0.125f*(1.0f + scaledRadiusJ2*r2Inverse)*(l_ij2 - u_ij2) + 0.25f*logRatio*r2Inverse;
            fvec4 de = bornForce*t3*rInverse;
peastman's avatar
peastman committed
369
            de = blend(0.0f, de, include);
peastman's avatar
peastman committed
370
371
372
373
374
375
376
377
378
379
380
            fvec4 fx = dx*de;
            fvec4 fy = dy*de;
            fvec4 fz = dz*de;
            blockAtomForceX += fx;
            blockAtomForceY += fy;
            blockAtomForceZ += fz;
            float* atomForce = forces+4*atomJ;
            fvec4 one(1.0f);
            atomForce[0] -= dot4(fx, one);
            atomForce[1] -= dot4(fy, one);
            atomForce[2] -= dot4(fz, one);
381
        }
peastman's avatar
peastman committed
382
383
        fvec4 f[4] = {blockAtomForceX, blockAtomForceY, blockAtomForceZ, 0.0f};
        transpose(f[0], f[1], f[2], f[3]);
384
385
        for (int i = 0; i < numInBlock; i++) {
            int atomIndex = blockStart+i;
peastman's avatar
peastman committed
386
            (fvec4(forces+4*atomIndex)+f[i]).store(forces+4*atomIndex);
387
388
389
390
391
        }
    }
    threadEnergy[threadIndex] = energy;
}

392
393
394
395
void CpuGBSAOBCForce::getDeltaR(const fvec4& posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
    dx = x-posI[0];
    dy = y-posI[1];
    dz = z-posI[2];
396
    if (periodic) {
397
398
399
        dx -= round(dx*invBoxSize[0])*boxSize[0];
        dy -= round(dy*invBoxSize[1])*boxSize[1];
        dz -= round(dz*invBoxSize[2])*boxSize[2];
400
    }
401
    r2 = dx*dx + dy*dy + dz*dz;
402
}
403

404
fvec4 CpuGBSAOBCForce::fastLog(const fvec4& x) {
405
406
    // Evaluate log(x) using a lookup table for speed.

peastman's avatar
peastman committed
407
    if (any((x < TABLE_MIN) | (x >= TABLE_MAX)))
408
409
        return fvec4(logf(x[0]), logf(x[1]), logf(x[2]), logf(x[3]));
    fvec4 x1 = (x-TABLE_MIN)*logDXInv;
410
    ivec4 index = floor(x1);
411
412
    fvec4 coeff2 = x1-index;
    fvec4 coeff1 = 1.0f-coeff2;
peastman's avatar
peastman committed
413
414
415
416
417
418
    fvec4 t1(&logTable[index[0]]);
    fvec4 t2(&logTable[index[1]]);
    fvec4 t3(&logTable[index[2]]);
    fvec4 t4(&logTable[index[3]]);
    transpose(t1, t2, t3, t4);
    return coeff1*t1 + coeff2*t2;
419
}