"vscode:/vscode.git/clone" did not exist on "a700032db0f70bb67f7c32e52045c0018cf48216"
CpuGBSAOBCForce.cpp 17.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27

/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
 * Contributors: Pande Group
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject
 * to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "CpuGBSAOBCForce.h"
#include "SimTKOpenMMRealType.h"
#include "openmm/internal/vectorize.h"
28
#include "gmx_atomic.h"
29
#include <algorithm>
30
#include <cmath>
31
#include <cstdlib>
32
33
34
35

using namespace std;
using namespace OpenMM;

36
37
38
const int CpuGBSAOBCForce::NUM_TABLE_POINTS = 4096;
const float CpuGBSAOBCForce::TABLE_MIN = 0.25f;
const float CpuGBSAOBCForce::TABLE_MAX = 1.5f;
39

40
41
42
43
44
45
46
47
48
49
50
class CpuGBSAOBCForce::ComputeTask : public ThreadPool::Task {
public:
    ComputeTask(CpuGBSAOBCForce& owner) : owner(owner) {
    }
    void execute(ThreadPool& threads, int threadIndex) {
        owner.threadComputeForce(threads, threadIndex);
    }
    CpuGBSAOBCForce& owner;
};

CpuGBSAOBCForce::CpuGBSAOBCForce() : cutoff(false), periodic(false) {
51
    logDX = (TABLE_MAX-TABLE_MIN)/NUM_TABLE_POINTS;
52
    logDXInv = 1.0f/logDX;
peastman's avatar
peastman committed
53
54
    logTable.resize(NUM_TABLE_POINTS+4);
    for (int i = 0; i < NUM_TABLE_POINTS+4; i++) {
55
        double x = TABLE_MIN+i*logDX;
56
        logTable[i] = log(x);
57
    }
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
}

void CpuGBSAOBCForce::setUseCutoff(float distance) {
    cutoff = true;
    cutoffDistance = distance;
}

void CpuGBSAOBCForce::setPeriodic(float* periodicBoxSize) {
    periodic = true;
    this->periodicBoxSize[0] = periodicBoxSize[0];
    this->periodicBoxSize[1] = periodicBoxSize[1];
    this->periodicBoxSize[2] = periodicBoxSize[2];
}

void CpuGBSAOBCForce::setSoluteDielectric(float dielectric) {
    soluteDielectric = dielectric;
}

void CpuGBSAOBCForce::setSolventDielectric(float dielectric) {
    solventDielectric = dielectric;
}

80
81
82
83
void CpuGBSAOBCForce::setSurfaceAreaEnergy(float energy) {
    surfaceAreaFactor = 4*M_PI*energy;
}

84
85
86
87
88
89
const std::vector<std::pair<float, float> >& CpuGBSAOBCForce::getParticleParameters() const {
    return particleParams;
}

void CpuGBSAOBCForce::setParticleParameters(const std::vector<std::pair<float, float> >& params) {
    particleParams = params;
90
91
    bornRadii.resize(params.size()+3);
    obcChain.resize(params.size()+3);
92
93
}

94
void CpuGBSAOBCForce::computeForce(const AlignedArray<float>& posq, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
95
96
97
98
99
100
101
102
103
    // Record the parameters for the threads.
    
    this->posq = &posq[0];
    this->threadForce = &threadForce;
    includeEnergy = (totalEnergy != NULL);
    int numThreads = threads.getNumThreads();
    threadEnergy.resize(numThreads);
    threadBornForces.resize(numThreads);
    for (int i = 0; i < numThreads; i++)
104
        threadBornForces[i].resize(particleParams.size()+3);
105
106
    gmx_atomic_t counter;
    this->atomicCounter = &counter;
107
108
109
110
    
    // Signal the threads to start running and wait for them to finish.
    
    ComputeTask task(*this);
111
    gmx_atomic_set(&counter, 0);
112
113
    threads.execute(task);
    threads.waitForThreads(); // Compute Born radii
114
    gmx_atomic_set(&counter, 0);
115
116
    threads.resumeThreads();
    threads.waitForThreads(); // Compute surface area term
117
    gmx_atomic_set(&counter, 0);
118
119
    threads.resumeThreads();
    threads.waitForThreads(); // First loop
120
    gmx_atomic_set(&counter, 0);
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
    threads.resumeThreads();
    threads.waitForThreads(); // Second loop
    
    // Combine the energies from all the threads.
    
    if (totalEnergy != NULL) {
        double energy = 0;
        for (int i = 0; i < numThreads; i++)
            energy += threadEnergy[i];
        *totalEnergy += (float) energy;
    }
}

void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
    int numParticles = particleParams.size();
    int numThreads = threads.getNumThreads();
    const float dielectricOffset = 0.009;
    const float alphaObc = 1.0f;
    const float betaObc = 0.8f;
    const float gammaObc = 4.85f;
    fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
    fvec4 invBoxSize((1/periodicBoxSize[0]), (1/periodicBoxSize[1]), (1/periodicBoxSize[2]), 0);
    int start = (threadIndex*numParticles)/numThreads;
    int end = ((threadIndex+1)*numParticles)/numThreads;

    // Calculate Born radii

148
149
150
151
152
    while (true) {
        int blockStart = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 4);
        if (blockStart >= numParticles)
            break;
        int numInBlock = min(4, numParticles-blockStart);
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
        ivec4 blockAtomIndex(blockStart, blockStart+1, blockStart+2, blockStart+3);
        float atomRadius[4], atomx[4], atomy[4], atomz[4];
        int blockMask[4] = {0, 0, 0, 0};
        for (int i = 0; i < numInBlock; i++) {
            int atomIndex = blockStart+i;
            atomRadius[i] = particleParams[atomIndex].first;
            atomx[i] = posq[4*atomIndex];
            atomy[i] = posq[4*atomIndex+1];
            atomz[i] = posq[4*atomIndex+2];
            blockMask[i] = 0xFFFFFFFF;
        }
        fvec4 offsetRadiusI(atomRadius);
        fvec4 radiusIInverse = 1.0f/offsetRadiusI;
        fvec4 x(atomx);
        fvec4 y(atomy);
        fvec4 z(atomz);
        ivec4 mask(blockMask);
        float sum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
171
        for (int atomJ = 0; atomJ < numParticles; atomJ++) {
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
            fvec4 posJ(posq+4*atomJ);
            fvec4 dx, dy, dz, r2;
            getDeltaR(posJ, x, y, z, dx, dy, dz, r2, periodic, boxSize, invBoxSize);
            ivec4 include = mask & (blockAtomIndex != ivec4(atomJ));
            if (cutoff)
                include = include & (r2 < cutoffDistance*cutoffDistance);
            if (!any(include))
                continue;
            fvec4 r = sqrt(r2);
            float scaledRadiusJ = particleParams[atomJ].second;
            float scaledRadiusJ2 = scaledRadiusJ*scaledRadiusJ;
            fvec4 rScaledRadiusJ = r + scaledRadiusJ;
            include = include & (offsetRadiusI < rScaledRadiusJ);
            fvec4 l_ij = 1.0f/max(offsetRadiusI, abs(r-scaledRadiusJ));
            fvec4 u_ij = 1.0f/rScaledRadiusJ;
            fvec4 l_ij2 = l_ij*l_ij;
            fvec4 u_ij2 = u_ij*u_ij;
            fvec4 rInverse = 1.0f/r;
            fvec4 r2Inverse = rInverse*rInverse;
191
            fvec4 logRatio = fastLog(u_ij/l_ij);
192
193
194
195
196
197
            fvec4 term = l_ij - u_ij + 0.25f*r*(u_ij2 - l_ij2) + (0.5f*rInverse*logRatio) + (0.25f*scaledRadiusJ*scaledRadiusJ*rInverse)*(l_ij2 - u_ij2);
            for (int j = 0; j < 4; j++) {
                if (include[j]) {
                    sum[j] += term[j];
                    if (offsetRadiusI[j] < scaledRadiusJ-r[j])
                        sum[j] += 2.0f*(radiusIInverse[j]-l_ij[j]);
198
199
200
                }
            }
        }
201
202
        for (int i = 0; i < numInBlock; i++) {
            int atomIndex = blockStart+i;
peastman's avatar
peastman committed
203
            sum[i] *= 0.5f*atomRadius[i];
204
205
206
            float sum2 = sum[i]*sum[i];
            float sum3 = sum[i]*sum2;
            float tanhSum = tanh(alphaObc*sum[i] - betaObc*sum2 + gammaObc*sum3);
peastman's avatar
peastman committed
207
208
209
            float radiusI = atomRadius[i] + dielectricOffset;
            bornRadii[atomIndex] = 1.0f/(1.0f/atomRadius[i] - tanhSum/radiusI);
            obcChain[atomIndex] = atomRadius[i]*(alphaObc - 2.0f*betaObc*sum[i] + 3.0f*gammaObc*sum2);
210
211
            obcChain[atomIndex] = (1.0f - tanhSum*tanhSum)*obcChain[atomIndex]/radiusI;
        }
212
213
214
215
216
217
218
    }
    threads.syncThreads();

    // Calculate ACE surface area term.

    const float probeRadius = 0.14f;
    double energy = 0.0;
peastman's avatar
peastman committed
219
    AlignedArray<float>& bornForces = threadBornForces[threadIndex];
220
221
    for (int i = 0; i < numParticles; i++)
        bornForces[i] = 0.0f;
222
223
224
225
    while (true) {
        int atomI = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
        if (atomI >= numParticles)
            break;
226
        if (bornRadii[atomI] > 0) {
227
228
229
            float radiusI = particleParams[atomI].first + dielectricOffset;
            float r = radiusI + probeRadius;
            float ratio6 = powf(radiusI/bornRadii[atomI], 6.0f);
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
            float saTerm = surfaceAreaFactor*r*r*ratio6;
            energy += saTerm;
            bornForces[atomI] = -6.0f*saTerm/bornRadii[atomI]; 
        }
        else
            bornForces[atomI] = 0.0f;
    }
    threads.syncThreads();
 
    // First loop of Born energy computation.

    float* forces = &(*threadForce)[threadIndex][0];
    float preFactor;
    if (soluteDielectric != 0.0f && solventDielectric != 0.0f)
        preFactor = ONE_4PI_EPS0*((1.0f/solventDielectric) - (1.0f/soluteDielectric));
    else
        preFactor = 0.0f;
247
248
249
250
251
    while (true) {
        int blockStart = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 4);
        if (blockStart >= numParticles)
            break;
        int numInBlock = min(4, numParticles-blockStart);
252
253
254
        ivec4 blockAtomIndex(blockStart, blockStart+1, blockStart+2, blockStart+3);
        float atomCharge[4], atomx[4], atomy[4], atomz[4];
        int blockMask[4] = {0, 0, 0, 0};
peastman's avatar
peastman committed
255
        fvec4 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f), blockAtomBornForce(0.0f);
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
        for (int i = 0; i < numInBlock; i++) {
            int atomIndex = blockStart+i;
            atomx[i] = posq[4*atomIndex];
            atomy[i] = posq[4*atomIndex+1];
            atomz[i] = posq[4*atomIndex+2];
            atomCharge[i] = preFactor*posq[4*atomIndex+3];
            blockMask[i] = 0xFFFFFFFF;
        }
        fvec4 radii(&bornRadii[blockStart]);
        fvec4 x(atomx);
        fvec4 y(atomy);
        fvec4 z(atomz);
        fvec4 partialChargeI(atomCharge);
        ivec4 mask(blockMask);
        for (int atomJ = blockStart; atomJ < numParticles; atomJ++) {
271
            fvec4 posJ(posq+4*atomJ);
272
273
274
275
276
277
            fvec4 dx, dy, dz, r2;
            getDeltaR(posJ, x, y, z, dx, dy, dz, r2, periodic, boxSize, invBoxSize);
            ivec4 include = mask & (blockAtomIndex <= ivec4(atomJ));
            if (cutoff)
                include = include & (r2 < cutoffDistance*cutoffDistance);
            if (!any(include))
278
                continue;
279
280
281
            fvec4 r = sqrt(r2);
            fvec4 alpha2_ij = radii*bornRadii[atomJ];
            fvec4 D_ij = r2/(4.0f*alpha2_ij);
peastman's avatar
peastman committed
282
            fvec4 expTerm(expf(-D_ij[0]), expf(-D_ij[1]), expf(-D_ij[2]), expf(-D_ij[3]));
peastman's avatar
peastman committed
283
            fvec4 denominator2 = r2 + alpha2_ij*expTerm;
284
285
286
287
            fvec4 denominator = sqrt(denominator2);
            fvec4 Gpol = (partialChargeI*posJ[3])/denominator; 
            fvec4 dGpol_dr = -Gpol*(1.0f - 0.25f*expTerm)/denominator2;  
            fvec4 dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f + D_ij)/denominator2;
peastman's avatar
peastman committed
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
            dGpol_dr = blend(0.0f, dGpol_dr, include);
            dGpol_dalpha2_ij = blend(0.0f, dGpol_dalpha2_ij, include);
            fvec4 fx = dx*dGpol_dr;
            fvec4 fy = dy*dGpol_dr;
            fvec4 fz = dz*dGpol_dr;
            blockAtomForceX -= fx;
            blockAtomForceY -= fy;
            blockAtomForceZ -= fz;
            blockAtomBornForce += dGpol_dalpha2_ij*bornRadii[atomJ];
            float* atomForce = forces+4*atomJ;
            fvec4 one(1.0f);
            atomForce[0] += dot4(fx, one);
            atomForce[1] += dot4(fy, one);
            atomForce[2] += dot4(fz, one);
            ivec4 atomJMask = include & (blockAtomIndex != ivec4(atomJ));
            fvec4 termEnergy = blend(0.0f, Gpol, include);
            if (cutoff)
                termEnergy -= blend(0.0f, partialChargeI*posJ[3]/cutoffDistance, atomJMask);
            termEnergy *= blend(0.5f, 1.0f, atomJMask);
            energy += dot4(termEnergy, one);
            bornForces[atomJ] += dot4(blend(0.0f, dGpol_dalpha2_ij, atomJMask), radii);
309
        }
peastman's avatar
peastman committed
310
311
        fvec4 f[4] = {blockAtomForceX, blockAtomForceY, blockAtomForceZ, 0.0f};
        transpose(f[0], f[1], f[2], f[3]);
312
313
        for (int i = 0; i < numInBlock; i++) {
            int atomIndex = blockStart+i;
peastman's avatar
peastman committed
314
315
            (fvec4(forces+4*atomIndex)+f[i]).store(forces+4*atomIndex);
            bornForces[atomIndex] += blockAtomBornForce[i];
316
        }
317
318
319
320
321
    }
    threads.syncThreads();

    // Second loop of Born energy computation.

322
323
324
325
    while (true) {
        int blockStart = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 4);
        if (blockStart >= numParticles)
            break;
326
        fvec4 bornForce(0.0f);
327
        for (int i = 0; i < numThreads; i++)
328
329
330
            bornForce += fvec4(&threadBornForces[i][blockStart]);
        fvec4 radii(&bornRadii[blockStart]);
        bornForce *= radii*radii*fvec4(&obcChain[blockStart]);
331
        int numInBlock = min(4, numParticles-blockStart);
332
333
334
        ivec4 blockAtomIndex(blockStart, blockStart+1, blockStart+2, blockStart+3);
        float atomRadius[4], atomx[4], atomy[4], atomz[4];
        int blockMask[4] = {0, 0, 0, 0};
peastman's avatar
peastman committed
335
        fvec4 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
336
337
338
339
340
341
342
343
        for (int i = 0; i < numInBlock; i++) {
            int atomIndex = blockStart+i;
            atomRadius[i] = particleParams[atomIndex].first;
            atomx[i] = posq[4*atomIndex];
            atomy[i] = posq[4*atomIndex+1];
            atomz[i] = posq[4*atomIndex+2];
            blockMask[i] = 0xFFFFFFFF;
        }
peastman's avatar
peastman committed
344
345
346
347
348
        for (int i = numInBlock; i < 4; i++) {
            atomx[i] = 0.0f;
            atomy[i] = 0.0f;
            atomz[i] = 0.0f;
        }
349
350
351
352
353
        fvec4 offsetRadiusI(atomRadius);
        fvec4 x(atomx);
        fvec4 y(atomy);
        fvec4 z(atomz);
        ivec4 mask(blockMask);
354
        for (int atomJ = 0; atomJ < numParticles; atomJ++) {
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
            fvec4 posJ(posq+4*atomJ);
            fvec4 dx, dy, dz, r2;
            getDeltaR(posJ, x, y, z, dx, dy, dz, r2, periodic, boxSize, invBoxSize);
            ivec4 include = mask & (blockAtomIndex != ivec4(atomJ));
            if (cutoff)
                include = include & (r2 < cutoffDistance*cutoffDistance);
            if (!any(include))
                continue;
            fvec4 r = sqrt(r2);
            float scaledRadiusJ = particleParams[atomJ].second;
            float scaledRadiusJ2 = scaledRadiusJ*scaledRadiusJ;
            fvec4 rScaledRadiusJ = r + scaledRadiusJ;
            include = include & (offsetRadiusI < rScaledRadiusJ);
            fvec4 l_ij = 1.0f/max(offsetRadiusI, abs(r-scaledRadiusJ));
            fvec4 u_ij = 1.0f/rScaledRadiusJ;
            fvec4 l_ij2 = l_ij*l_ij;
            fvec4 u_ij2 = u_ij*u_ij;
            fvec4 rInverse = 1.0f/r;
            fvec4 r2Inverse = rInverse*rInverse;
374
            fvec4 logRatio = fastLog(u_ij/l_ij);
375
376
            fvec4 t3 = 0.125f*(1.0f + scaledRadiusJ2*r2Inverse)*(l_ij2 - u_ij2) + 0.25f*logRatio*r2Inverse;
            fvec4 de = bornForce*t3*rInverse;
peastman's avatar
peastman committed
377
            de = blend(0.0f, de, include);
peastman's avatar
peastman committed
378
379
380
381
382
383
384
385
386
387
388
            fvec4 fx = dx*de;
            fvec4 fy = dy*de;
            fvec4 fz = dz*de;
            blockAtomForceX += fx;
            blockAtomForceY += fy;
            blockAtomForceZ += fz;
            float* atomForce = forces+4*atomJ;
            fvec4 one(1.0f);
            atomForce[0] -= dot4(fx, one);
            atomForce[1] -= dot4(fy, one);
            atomForce[2] -= dot4(fz, one);
389
        }
peastman's avatar
peastman committed
390
391
        fvec4 f[4] = {blockAtomForceX, blockAtomForceY, blockAtomForceZ, 0.0f};
        transpose(f[0], f[1], f[2], f[3]);
392
393
        for (int i = 0; i < numInBlock; i++) {
            int atomIndex = blockStart+i;
peastman's avatar
peastman committed
394
            (fvec4(forces+4*atomIndex)+f[i]).store(forces+4*atomIndex);
395
396
397
398
399
        }
    }
    threadEnergy[threadIndex] = energy;
}

400
401
402
403
void CpuGBSAOBCForce::getDeltaR(const fvec4& posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
    dx = x-posI[0];
    dy = y-posI[1];
    dz = z-posI[2];
404
    if (periodic) {
405
406
407
        dx -= round(dx*invBoxSize[0])*boxSize[0];
        dy -= round(dy*invBoxSize[1])*boxSize[1];
        dz -= round(dz*invBoxSize[2])*boxSize[2];
408
    }
409
    r2 = dx*dx + dy*dy + dz*dz;
410
}
411

412
fvec4 CpuGBSAOBCForce::fastLog(const fvec4& x) {
413
414
    // Evaluate log(x) using a lookup table for speed.

415
    fvec4 x1 = (x-TABLE_MIN)*logDXInv;
416
    ivec4 index = floor(x1);
peastman's avatar
peastman committed
417
418
    if (any((index < 0) | (index >= NUM_TABLE_POINTS)))
        return fvec4(logf(x[0]), logf(x[1]), logf(x[2]), logf(x[3]));
419
420
    fvec4 coeff2 = x1-index;
    fvec4 coeff1 = 1.0f-coeff2;
peastman's avatar
peastman committed
421
422
423
424
425
426
    fvec4 t1(&logTable[index[0]]);
    fvec4 t2(&logTable[index[1]]);
    fvec4 t3(&logTable[index[2]]);
    fvec4 t4(&logTable[index[3]]);
    transpose(t1, t2, t3, t4);
    return coeff1*t1 + coeff2*t2;
427
}