CpuNonbondedForce.cpp 23.7 KB
Newer Older
1

peastman's avatar
peastman committed
2
/* Portions copyright (c) 2006-2018 Stanford University and Simbios.
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
 * Contributors: Pande Group
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject
 * to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include <complex>

#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForce.h"
#include "ReferenceForce.h"
#include "ReferencePME.h"
31
#include <algorithm>
32
#include <iostream>
33
34
35
36
37
38

// In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined.
#include "openmm/internal/MSVC_erfc.h"

using namespace std;
peastman's avatar
peastman committed
39
using namespace OpenMM;
40

peastman's avatar
peastman committed
41
const float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M));
42
const int CpuNonbondedForce::NUM_TABLE_POINTS = 2048;
43

44
45
46
47
48
49
/**---------------------------------------------------------------------------------------

   CpuNonbondedForce constructor

   --------------------------------------------------------------------------------------- */

50
51
CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), ljpme(false), tableIsValid(false), expTableIsValid(false),
    cutoffDistance(0.0f), alphaDispersionEwald(0.0f), alphaEwald(0.0f) {
52
53
}

54
55
56
CpuNonbondedForce::~CpuNonbondedForce() {
}

57
/**---------------------------------------------------------------------------------------
58

59
   Set the force to use a cutoff.
60

61
62
63
   @param distance            the cutoff distance
   @param neighbors           the neighbor list to use
   @param solventDielectric   the dielectric constant of the bulk solvent
64
65
66

     --------------------------------------------------------------------------------------- */

67
68
69
void CpuNonbondedForce::setUseCutoff(float distance, const CpuNeighborList& neighbors, float solventDielectric) {
    if (distance != cutoffDistance)
        tableIsValid = false;
70
71
    cutoff = true;
    cutoffDistance = distance;
72
    inverseRcut6 = pow(cutoffDistance, -6);
73
74
75
    neighborList = &neighbors;
    krf = pow(cutoffDistance, -3.0f)*(solventDielectric-1.0)/(2.0*solventDielectric+1.0);
    crf = (1.0/cutoffDistance)*(3.0*solventDielectric)/(2.0*solventDielectric+1.0);
76
77
78
79
80
81
82
83
84
85
86
    if(alphaDispersionEwald != 0.0f){
        // We set this here, in case setUseCutoff is called after the dispersion alpha is set.
        double dalphaR = alphaDispersionEwald*cutoffDistance;
        double dar2 = dalphaR * dalphaR;
        double dar4 = dar2*dar2;
        double dar6 = dar4*dar2;
        double expterm = EXP(-dar2);
        inverseRcut6Expterm  = inverseRcut6*(1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
    }

}
87
88
89
90
91
92
93
94
95
96
97
98
99
100

/**---------------------------------------------------------------------------------------

   Set the force to use a switching function on the Lennard-Jones interaction.

   @param distance            the switching distance

   --------------------------------------------------------------------------------------- */

void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
    useSwitch = true;
    switchingDistance = distance;
}

101
/**---------------------------------------------------------------------------------------
102
103
104
105
106

     Set the force to use periodic boundary conditions.  This requires that a cutoff has
     also been set, and the smallest side of the periodic box is at least twice the cutoff
     distance.

107
     @param periodicBoxVectors    the vectors defining the periodic box
108
109
110

     --------------------------------------------------------------------------------------- */

peastman's avatar
peastman committed
111
void CpuNonbondedForce::setPeriodic(Vec3* periodicBoxVectors) {
112
113

    assert(cutoff);
114
115
116
    assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance);
    assert(periodicBoxVectors[1][1] >= 2.0*cutoffDistance);
    assert(periodicBoxVectors[2][2] >= 2.0*cutoffDistance);
117
    periodic = true;
118
119
120
121
122
123
124
125
126
127
128
    this->periodicBoxVectors[0] = periodicBoxVectors[0];
    this->periodicBoxVectors[1] = periodicBoxVectors[1];
    this->periodicBoxVectors[2] = periodicBoxVectors[2];
    recipBoxSize[0] = (float) (1.0/periodicBoxVectors[0][0]);
    recipBoxSize[1] = (float) (1.0/periodicBoxVectors[1][1]);
    recipBoxSize[2] = (float) (1.0/periodicBoxVectors[2][2]);
    periodicBoxVec4.resize(3);
    periodicBoxVec4[0] = fvec4(periodicBoxVectors[0][0], periodicBoxVectors[0][1], periodicBoxVectors[0][2], 0);
    periodicBoxVec4[1] = fvec4(periodicBoxVectors[1][0], periodicBoxVectors[1][1], periodicBoxVectors[1][2], 0);
    periodicBoxVec4[2] = fvec4(periodicBoxVectors[2][0], periodicBoxVectors[2][1], periodicBoxVectors[2][2], 0);
    triclinic = (periodicBoxVectors[0][1] != 0.0 || periodicBoxVectors[0][2] != 0.0 ||
129
130
131
            periodicBoxVectors[1][0] != 0.0 || periodicBoxVectors[1][2] != 0.0 ||
            periodicBoxVectors[2][0] != 0.0 || periodicBoxVectors[2][1] != 0.0);
}
132

133
/**---------------------------------------------------------------------------------------
134
135
136
137
138
139
140
141
142
143

     Set the force to use Ewald summation.

     @param alpha  the Ewald separation parameter
     @param kmaxx  the largest wave vector in the x direction
     @param kmaxy  the largest wave vector in the y direction
     @param kmaxz  the largest wave vector in the z direction

     --------------------------------------------------------------------------------------- */

144
145
146
147
148
149
150
151
152
153
void CpuNonbondedForce::setUseEwald(float alpha, int kmaxx, int kmaxy, int kmaxz) {
    if (alpha != alphaEwald)
        tableIsValid = false;
    alphaEwald = alpha;
    numRx = kmaxx;
    numRy = kmaxy;
    numRz = kmaxz;
    ewald = true;
    tabulateEwaldScaleFactor();
}
154

155
/**---------------------------------------------------------------------------------------
156
157
158
159
160
161
162
163

     Set the force to use Particle-Mesh Ewald (PME) summation.

     @param alpha  the Ewald separation parameter
     @param gridSize the dimensions of the mesh

     --------------------------------------------------------------------------------------- */

164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
void CpuNonbondedForce::setUsePME(float alpha, int meshSize[3]) {
    if (alpha != alphaEwald)
        tableIsValid = false;
    alphaEwald = alpha;
    meshDim[0] = meshSize[0];
    meshDim[1] = meshSize[1];
    meshDim[2] = meshSize[2];
    pme = true;
    tabulateEwaldScaleFactor();
}


/**---------------------------------------------------------------------------------------

     Set the force to use Particle-Mesh Ewald (PME) summation for dispersion.

     @param alpha  the Ewald separation parameter
     @param gridSize the dimensions of the mesh

     --------------------------------------------------------------------------------------- */

void CpuNonbondedForce::setUseLJPME(float alpha, int meshSize[3]) {
    if (alpha != alphaDispersionEwald)
        expTableIsValid = false;
    alphaDispersionEwald = alpha;
    dispersionMeshDim[0] = meshSize[0];
    dispersionMeshDim[1] = meshSize[1];
    dispersionMeshDim[2] = meshSize[2];
    ljpme = true;
    tabulateExpTerms();
    if(cutoffDistance != 0.0f){
        // We set this here, in case setUseLJPME is called after the cutoff is set
        double dalphaR = alphaDispersionEwald*cutoffDistance;
        double dar2 = dalphaR * dalphaR;
        double dar4 = dar2*dar2;
        double dar6 = dar4*dar2;
        double expterm = EXP(-dar2);
        inverseRcut6Expterm  = inverseRcut6*(1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
    }
}


void CpuNonbondedForce::tabulateEwaldScaleFactor() {
207
208
209
    if (tableIsValid)
        return;
    tableIsValid = true;
peastman's avatar
peastman committed
210
    ewaldDX = cutoffDistance/NUM_TABLE_POINTS;
peastman's avatar
peastman committed
211
    ewaldDXInv = 1.0f/ewaldDX;
212
213
    erfcDXInv = 1.0f/(ewaldDX*alphaEwald);
    erfcTable.resize(NUM_TABLE_POINTS+4);
peastman's avatar
peastman committed
214
215
    ewaldScaleTable.resize(NUM_TABLE_POINTS+4);
    for (int i = 0; i < NUM_TABLE_POINTS+4; i++) {
216
        double r = i*ewaldDX;
peastman's avatar
peastman committed
217
        double alphaR = alphaEwald*r;
218
219
        erfcTable[i] = erfc(alphaR);
        ewaldScaleTable[i] = erfcTable[i] + TWO_OVER_SQRT_PI*alphaR*exp(-alphaR*alphaR);
peastman's avatar
peastman committed
220
221
    }
}
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242

void CpuNonbondedForce::tabulateExpTerms() {
    if (expTableIsValid)
        return;
    expTableIsValid = true;
    exptermsDX = cutoffDistance/NUM_TABLE_POINTS;
    exptermsDXInv = 1.0f/exptermsDX;
    exptermsTable.resize(NUM_TABLE_POINTS+4);
    dExptermsTable.resize(NUM_TABLE_POINTS+4);
    for (int i = 0; i < NUM_TABLE_POINTS+4; i++) {
        double r = i*ewaldDX;
        double dalphaR = alphaDispersionEwald*r;
        double dar2 = dalphaR * dalphaR;
        double dar4 = dar2*dar2;
        double dar6 = dar4*dar2;
        double expterm = EXP(-dar2);
        exptermsTable[i]  = (1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
        dExptermsTable[i] = (1.0 - expterm * (1.0 + dar2 + 0.5*dar4 + dar6/6.0));
    }
}

peastman's avatar
peastman committed
243
void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, const vector<Vec3>& atomCoordinates,
244
                                               const vector<pair<float, float> >& atomParameters, const vector<float> &C6params, const vector<set<int> >& exclusions,
peastman's avatar
peastman committed
245
                                               vector<Vec3>& forces, double* totalEnergy) const {
246
247
248
249
    typedef std::complex<float> d_complex;

    static const float epsilon     =  1.0;

250
    int kmax                       = (ewald ? max(numRx, max(numRy,numRz)) : 0);
peastman's avatar
peastman committed
251
    float factorEwald              = -1 / (4*alphaEwald*alphaEwald);
252
    float TWO_PI                   = 2.0 * PI_M;
253
    float recipCoeff               = (float)(ONE_4PI_EPS0*4*PI_M/(periodicBoxVectors[0][0] * periodicBoxVectors[1][1] * periodicBoxVectors[2][2]) /epsilon);
254

peastman's avatar
peastman committed
255
256
257
    if (pme) {
        pme_t pmedata;
        pme_init(&pmedata, alphaEwald, numberOfAtoms, meshDim, 5, 1);
peastman's avatar
peastman committed
258
        vector<double> charges(numberOfAtoms);
peastman's avatar
peastman committed
259
260
        for (int i = 0; i < numberOfAtoms; i++)
            charges[i] = posq[4*i+3];
peastman's avatar
peastman committed
261
        double recipEnergy = 0.0;
262
        pme_exec(pmedata, atomCoordinates, forces, charges, periodicBoxVectors, &recipEnergy);
peastman's avatar
peastman committed
263
264
        if (totalEnergy)
            *totalEnergy += recipEnergy;
265
        pme_destroy(pmedata);
266
267
268
269
270

        if (ljpme) {
            // Dispersion reciprocal space terms
            pme_init(&pmedata,alphaDispersionEwald,numberOfAtoms,dispersionMeshDim,5,1);

peastman's avatar
peastman committed
271
            std::vector<Vec3> dpmeforces;
272
            for (int i = 0; i < numberOfAtoms; i++){
peastman's avatar
peastman committed
273
274
                charges[i] = C6params[i];
                dpmeforces.push_back(Vec3());
275
            }
peastman's avatar
peastman committed
276
            double recipDispersionEnergy = 0.0;
277
278
            pme_exec_dpme(pmedata,atomCoordinates,dpmeforces,charges,periodicBoxVectors,&recipDispersionEnergy);
            for (int i = 0; i < numberOfAtoms; i++){
peastman's avatar
peastman committed
279
280
281
                forces[i][0] += dpmeforces[i][0];
                forces[i][1] += dpmeforces[i][1];
                forces[i][2] += dpmeforces[i][2];
282
283
284
285
286
287
288
            }
            if (totalEnergy)
                *totalEnergy += recipDispersionEnergy;

            pme_destroy(pmedata);
        }

peastman's avatar
peastman committed
289
    }
290
291
292

    // Ewald method

peastman's avatar
peastman committed
293
    else if (ewald) {
294

peastman's avatar
peastman committed
295
        // setup reciprocal box
296

peastman's avatar
peastman committed
297
        float recipBoxSize[3] = {(float) (TWO_PI/periodicBoxVectors[0][0]), (float) (TWO_PI/periodicBoxVectors[1][1]), (float) (TWO_PI/periodicBoxVectors[2][2])};
298
299


peastman's avatar
peastman committed
300
        // setup K-vectors
301

302
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
peastman's avatar
peastman committed
303
304
305
        vector<d_complex> eir(kmax*numberOfAtoms*3);
        vector<d_complex> tab_xy(numberOfAtoms);
        vector<d_complex> tab_qxyz(numberOfAtoms);
306

peastman's avatar
peastman committed
307
308
309
        for (int i = 0; (i < numberOfAtoms); i++) {
            float* pos = posq+4*i;
            for (int m = 0; (m < 3); m++)
310
                EIR(0, i, m) = d_complex(1,0);
311

peastman's avatar
peastman committed
312
            for (int m=0; (m<3); m++)
313
314
                EIR(1, i, m) = d_complex(cos(pos[m]*recipBoxSize[m]),
                                         sin(pos[m]*recipBoxSize[m]));
315

peastman's avatar
peastman committed
316
            for (int j=2; (j<kmax); j++)
317
318
                for (int m=0; (m<3); m++)
                    EIR(j, i, m) = EIR(j-1, i, m) * EIR(1, i, m);
319
320
        }

peastman's avatar
peastman committed
321
322
323
324
325
326
327
328
329
330
331
        // calculate reciprocal space energy and forces

        int lowry = 0;
        int lowrz = 1;

        for (int rx = 0; rx < numRx; rx++) {
            float kx = rx * recipBoxSize[0];
            for (int ry = lowry; ry < numRy; ry++) {
                float ky = ry * recipBoxSize[1];
                if (ry >= 0) {
                    for (int n = 0; n < numberOfAtoms; n++)
332
                        tab_xy[n] = EIR(rx, n, 0) * EIR(ry, n, 1);
peastman's avatar
peastman committed
333
334
335
                }
                else {
                    for (int n = 0; n < numberOfAtoms; n++)
336
                        tab_xy[n]= EIR(rx, n, 0) * conj (EIR(-ry, n, 1));
peastman's avatar
peastman committed
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
                }
                for (int rz = lowrz; rz < numRz; rz++) {
                    if (rz >= 0) {
                        for (int n = 0; n < numberOfAtoms; n++)
                            tab_qxyz[n] = posq[4*n+3] * (tab_xy[n] * EIR(rz, n, 2));
                    }
                    else {
                        for (int n = 0; n < numberOfAtoms; n++)
                            tab_qxyz[n] = posq[4*n+3] * (tab_xy[n] * conj(EIR(-rz, n, 2)));
                    }
                    float cs = 0.0f;
                    float ss = 0.0f;

                    for (int n = 0; n < numberOfAtoms; n++) {
                        cs += tab_qxyz[n].real();
                        ss += tab_qxyz[n].imag();
                    }

                    float kz = rz * recipBoxSize[2];
                    float k2 = kx * kx + ky * ky + kz * kz;
                    float ak = exp(k2*factorEwald) / k2;

                    for (int n = 0; n < numberOfAtoms; n++) {
                        float force = ak * (cs * tab_qxyz[n].imag() - ss * tab_qxyz[n].real());
                        forces[n][0] += 2 * recipCoeff * force * kx;
                        forces[n][1] += 2 * recipCoeff * force * ky;
                        forces[n][2] += 2 * recipCoeff * force * kz;
                    }

                    if (totalEnergy)
                        *totalEnergy += recipCoeff * ak * (cs * cs + ss * ss);

                    lowrz = 1 - numRz;
                }
                lowry = 1 - numRy;
            }
373
374
        }
    }
peastman's avatar
peastman committed
375
}
376
377


peastman's avatar
peastman committed
378
void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const vector<Vec3>& atomCoordinates, const vector<pair<float, float> >& atomParameters,
379
                                           const vector<float>& C6params, const vector<set<int> >& exclusions, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
380
381
    // Record the parameters for the threads.
    
peastman's avatar
peastman committed
382
    this->numberOfAtoms = numberOfAtoms;
383
    this->posq = posq;
384
    this->atomCoordinates = &atomCoordinates[0];
peastman's avatar
peastman committed
385
    this->atomParameters = &atomParameters[0];
386
    this->C6params = &C6params[0];
peastman's avatar
peastman committed
387
    this->exclusions = &exclusions[0];
388
    this->threadForce = &threadForce;
389
    includeEnergy = (totalEnergy != NULL);
390
    threadEnergy.resize(threads.getNumThreads());
peastman's avatar
peastman committed
391
    atomicCounter = 0;
392
393
394
    
    // Signal the threads to start running and wait for them to finish.
    
peastman's avatar
peastman committed
395
    threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeDirect(threads, threadIndex); });
396
    threads.waitForThreads();
397
    
peastman's avatar
peastman committed
398
399
400
    // Signal the threads to subtract the exclusions.
    
    if (ewald || pme) {
peastman's avatar
peastman committed
401
        atomicCounter = 0;
peastman's avatar
peastman committed
402
403
404
405
        threads.resumeThreads();
        threads.waitForThreads();
    }
    
406
    // Combine the energies from all the threads.
407
    
408
409
410
411
412
    if (totalEnergy != NULL) {
        double directEnergy = 0;
        int numThreads = threads.getNumThreads();
        for (int i = 0; i < numThreads; i++)
            directEnergy += threadEnergy[i];
413
        *totalEnergy += directEnergy;
414
    }
415
416
}

417
418
419
420
421
422
void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex) {
    // Compute this thread's subset of interactions.

    int numThreads = threads.getNumThreads();
    threadEnergy[threadIndex] = 0;
    double* energyPtr = (includeEnergy ? &threadEnergy[threadIndex] : NULL);
423
    float* forces = &(*threadForce)[threadIndex][0];
424
425
    fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0);
    fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0);
426
    if (ewald || pme || ljpme) {
427
        // Compute the interactions from the neighbor list.
428
        while (true) {
peastman's avatar
peastman committed
429
            int nextBlock = atomicCounter++;
430
431
432
433
            if (nextBlock >= neighborList->getNumBlocks())
                break;
            calculateBlockEwaldIxn(nextBlock, forces, energyPtr, boxSize, invBoxSize);
        }
434
435

        // Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
436

peastman's avatar
peastman committed
437
438
439
        threads.syncThreads();
        const int groupSize = max(1, numberOfAtoms/(10*numThreads));
        while (true) {
peastman's avatar
peastman committed
440
            int start = atomicCounter.fetch_add(groupSize);
peastman's avatar
peastman committed
441
442
443
444
            if (start >= numberOfAtoms)
                break;
            int end = min(start+groupSize, numberOfAtoms);
            for (int i = start; i < end; i++) {
445
                fvec4 posI((float) atomCoordinates[i][0], (float) atomCoordinates[i][1], (float) atomCoordinates[i][2], 0.0f);
peastman's avatar
peastman committed
446
                float scaledChargeI = (float) (ONE_4PI_EPS0*posq[4*i+3]);
peastman's avatar
peastman committed
447
448
449
                for (int excluded : exclusions[i]) {
                    if (excluded > i) {
                        int j = excluded;
peastman's avatar
peastman committed
450
                        fvec4 deltaR;
peastman's avatar
peastman committed
451
                        fvec4 posJ((float) atomCoordinates[j][0], (float) atomCoordinates[j][1], (float) atomCoordinates[j][2], 0.0f);
peastman's avatar
peastman committed
452
453
454
455
456
457
458
459
460
                        float r2;
                        getDeltaR(posJ, posI, deltaR, r2, false, boxSize, invBoxSize);
                        float r = sqrtf(r2);
                        float alphaR = alphaEwald*r;
                        float erfAlphaR = erf(alphaR);
                        if (erfAlphaR > 1e-6f) {
                            float inverseR = 1/r;
                            float chargeProdOverR = scaledChargeI*posq[4*j+3]*inverseR;
                            float dEdR = chargeProdOverR*inverseR*inverseR;
461
                            dEdR = dEdR * (erfAlphaR-TWO_OVER_SQRT_PI*alphaR*(float)exp(-alphaR*alphaR));
peastman's avatar
peastman committed
462
463
464
465
466
467
                            fvec4 result = deltaR*dEdR;
                            (fvec4(forces+4*i)-result).store(forces+4*i);
                            (fvec4(forces+4*j)+result).store(forces+4*j);
                            if (includeEnergy)
                                threadEnergy[threadIndex] -= chargeProdOverR*erfAlphaR;
                        }
468
                        else if (includeEnergy)
469
470
471
472
473
474
475
476
477
478
479
480
                            threadEnergy[threadIndex] -= alphaEwald*TWO_OVER_SQRT_PI*scaledChargeI*posq[4*j+3];
                        if (ljpme) {
                            float C6ij = C6params[i]*C6params[j];
                            float inverseR2 = 1.0f/r2;
                            float emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
                            if(includeEnergy)
                                threadEnergy[threadIndex] += emult;
                            float dEdR = -6.0f*C6ij*inverseR2*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
                            fvec4 result = deltaR*dEdR;
                            (fvec4(forces+4*i)-result).store(forces+4*i);
                            (fvec4(forces+4*j)+result).store(forces+4*j);
                        }
481
                    }
482
                }
483
            }
484
        }
485
486
487
    }
    else if (cutoff) {
        // Compute the interactions from the neighbor list.
488

489
        while (true) {
peastman's avatar
peastman committed
490
            int nextBlock = atomicCounter++;
491
492
493
494
            if (nextBlock >= neighborList->getNumBlocks())
                break;
            calculateBlockIxn(nextBlock, forces, energyPtr, boxSize, invBoxSize);
        }
495
496
497
    }
    else {
        // Loop over all atom pairs
498

499
        while (true) {
peastman's avatar
peastman committed
500
            int i = atomicCounter++;
501
502
            if (i >= numberOfAtoms)
                break;
503
504
505
            for (int j = i+1; j < numberOfAtoms; j++)
                if (exclusions[j].find(i) == exclusions[j].end())
                    calculateOneIxn(i, j, forces, energyPtr, boxSize, invBoxSize);
peastman's avatar
peastman committed
506
507
        }
    }
508
509
}

510
void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
511
512
    // get deltaR, R2, and R between 2 atoms

513
514
515
    fvec4 deltaR;
    fvec4 posI(posq+4*ii);
    fvec4 posJ(posq+4*jj);
516
    float r2;
peastman's avatar
peastman committed
517
    getDeltaR(posJ, posI, deltaR, r2, periodic, boxSize, invBoxSize);
518
519
    if (cutoff && r2 >= cutoffDistance*cutoffDistance)
        return;
520
521
    float r = sqrtf(r2);
    float inverseR = 1/r;
522
    float switchValue = 1, switchDeriv = 0;
523
524
525
526
    if (useSwitch && r > switchingDistance) {
        float t = (r-switchingDistance)/(cutoffDistance-switchingDistance);
        switchValue = 1+t*t*t*(-10+t*(15-t*6));
        switchDeriv = t*t*(-30+t*(60-t*30))/(cutoffDistance-switchingDistance);
527
    }
peastman's avatar
peastman committed
528
    float sig       = atomParameters[ii].first + atomParameters[jj].first;
529
    float sig2      = inverseR*sig;
530
    sig2     *= sig2;
531
532
    float sig6      = sig2*sig2*sig2;

peastman's avatar
peastman committed
533
    float eps       = atomParameters[ii].second*atomParameters[jj].second;
534
    float dEdR      = switchValue*eps*(12.0f*sig6 - 6.0f)*sig6;
peastman's avatar
peastman committed
535
    float chargeProd = ONE_4PI_EPS0*posq[4*ii+3]*posq[4*jj+3];
536
    if (cutoff)
537
        dEdR += (float) (chargeProd*(inverseR-2.0f*krf*r2));
538
    else
539
        dEdR += (float) (chargeProd*inverseR);
peastman's avatar
peastman committed
540
    dEdR *= inverseR*inverseR;
541
    float energy = eps*(sig6-1.0f)*sig6;
542
543
544
545
546
    if (useSwitch) {
        dEdR -= energy*switchDeriv*inverseR;
        energy *= switchValue;
    }

547
    // accumulate energies
548

549
550
551
552
553
554
    if (totalEnergy) {
        if (cutoff)
            energy += (float) (chargeProd*(inverseR+krf*r2-crf));
        else
            energy += (float) (chargeProd*inverseR);
        *totalEnergy += energy;
555
556
    }

557
    // accumulate forces
558

559
560
561
    fvec4 result = deltaR*dEdR;
    (fvec4(forces+4*ii)+result).store(forces+4*ii);
    (fvec4(forces+4*jj)-result).store(forces+4*jj);
562
}
563

564
565
void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
    deltaR = posJ-posI;
566
    if (periodic) {
567
        if (triclinic) {
568
569
570
            deltaR -= periodicBoxVec4[2]*floorf(deltaR[2]*recipBoxSize[2]+0.5f);
            deltaR -= periodicBoxVec4[1]*floorf(deltaR[1]*recipBoxSize[1]+0.5f);
            deltaR -= periodicBoxVec4[0]*floorf(deltaR[0]*recipBoxSize[0]+0.5f);
571
572
573
574
575
        }
        else {
            fvec4 base = round(deltaR*invBoxSize)*boxSize;
            deltaR = deltaR-base;
        }
576
    }
577
    r2 = dot3(deltaR, deltaR);
578
}
peastman's avatar
peastman committed
579

580
float CpuNonbondedForce::erfcApprox(float x) {
581
582
583
584
585
    float x1 = x*erfcDXInv;
    int index = min((int) floor(x1), NUM_TABLE_POINTS);
    float coeff2 = x1-index;
    float coeff1 = 1.0f-coeff2;
    return coeff1*erfcTable[index] + coeff2*erfcTable[index+1];
peastman's avatar
peastman committed
586
}
peastman's avatar
peastman committed
587

588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
float CpuNonbondedForce::exptermsApprox(float x) {
    float x1 = x*exptermsDXInv;
    int index = min((int) floor(x1), NUM_TABLE_POINTS);
    float coeff2 = x1-index;
    float coeff1 = 1.0f-coeff2;
    return coeff1*exptermsTable[index] + coeff2*exptermsTable[index+1];
}

float CpuNonbondedForce::dExptermsApprox(float x) {
    float x1 = x*exptermsDXInv;
    int index = min((int) floor(x1), NUM_TABLE_POINTS);
    float coeff2 = x1-index;
    float coeff1 = 1.0f-coeff2;
    return coeff1*dExptermsTable[index] + coeff2*dExptermsTable[index+1];
}