TestCudaNonbondedForce.cpp 26.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
 * Portions copyright (c) 2008 Stanford University and the Authors.           *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
 * Permission is hereby granted, free of charge, to any person obtaining a    *
 * copy of this software and associated documentation files (the "Software"), *
 * to deal in the Software without restriction, including without limitation  *
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
 * and/or sell copies of the Software, and to permit persons to whom the      *
 * Software is furnished to do so, subject to the following conditions:       *
 *                                                                            *
 * The above copyright notice and this permission notice shall be included in *
 * all copies or substantial portions of the Software.                        *
 *                                                                            *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
 * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
 * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
 * -------------------------------------------------------------------------- */

/**
33
 * This tests all the different force terms in the reference implementation of NonbondedForce.
34
35
36
 */

#include "../../../tests/AssertionUtilities.h"
37
#include "openmm/Context.h"
38
#include "CudaPlatform.h"
39
#include "ReferencePlatform.h"
40
41
42
43
44
#include "openmm/HarmonicBondForce.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/LangevinIntegrator.h"
#include "openmm/VerletIntegrator.h"
45
#include "openmm/internal/ContextImpl.h"
46
#include "kernels/gputypes.h"
47
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
48
#include "../src/sfmt/SFMT.h"
49
50
51
52
53
54
55
56
57
58
#include <iostream>
#include <vector>

using namespace OpenMM;
using namespace std;

const double TOL = 1e-5;

void testCoulomb() {
    CudaPlatform platform;
59
60
61
    System system;
    system.addParticle(1.0);
    system.addParticle(1.0);
62
    LangevinIntegrator integrator(0.0, 0.1, 0.01);
63
64
65
    NonbondedForce* forceField = new NonbondedForce();
    forceField->addParticle(0.5, 1, 0);
    forceField->addParticle(-1.5, 1, 0);
66
    system.addForce(forceField);
67
    Context context(system, integrator, platform);
68
69
70
71
72
73
    vector<Vec3> positions(2);
    positions[0] = Vec3(0, 0, 0);
    positions[1] = Vec3(2, 0, 0);
    context.setPositions(positions);
    State state = context.getState(State::Forces | State::Energy);
    const vector<Vec3>& forces = state.getForces();
74
    double force = ONE_4PI_EPS0*(-0.75)/4.0;
75
76
    ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], TOL);
    ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[1], TOL);
77
    ASSERT_EQUAL_TOL(ONE_4PI_EPS0*(-0.75)/2.0, state.getPotentialEnergy(), TOL);
78
79
80
81
}

void testLJ() {
    CudaPlatform platform;
82
83
84
    System system;
    system.addParticle(1.0);
    system.addParticle(1.0);
85
    LangevinIntegrator integrator(0.0, 0.1, 0.01);
86
87
88
    NonbondedForce* forceField = new NonbondedForce();
    forceField->addParticle(0, 1.2, 1);
    forceField->addParticle(0, 1.4, 2);
89
    system.addForce(forceField);
90
    Context context(system, integrator, platform);
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
    vector<Vec3> positions(2);
    positions[0] = Vec3(0, 0, 0);
    positions[1] = Vec3(2, 0, 0);
    context.setPositions(positions);
    State state = context.getState(State::Forces | State::Energy);
    const vector<Vec3>& forces = state.getForces();
    double x = 1.3/2.0;
    double eps = SQRT_TWO;
    double force = 4.0*eps*(12*std::pow(x, 12.0)-6*std::pow(x, 6.0))/2.0;
    ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], TOL);
    ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[1], TOL);
    ASSERT_EQUAL_TOL(4.0*eps*(std::pow(x, 12.0)-std::pow(x, 6.0)), state.getPotentialEnergy(), TOL);
}

void testExclusionsAnd14() {
    CudaPlatform platform;
107
    System system;
108
    LangevinIntegrator integrator(0.0, 0.1, 0.01);
109
    NonbondedForce* nonbonded = new NonbondedForce();
110
111
    for (int i = 0; i < 5; ++i) {
        system.addParticle(1.0);
112
        nonbonded->addParticle(0, 1.5, 0);
113
    }
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
    vector<pair<int, int> > bonds;
    bonds.push_back(pair<int, int>(0, 1));
    bonds.push_back(pair<int, int>(1, 2));
    bonds.push_back(pair<int, int>(2, 3));
    bonds.push_back(pair<int, int>(3, 4));
    nonbonded->createExceptionsFromBonds(bonds, 0.0, 0.0);
    int first14, second14;
    for (int i = 0; i < nonbonded->getNumExceptions(); i++) {
        int particle1, particle2;
        double chargeProd, sigma, epsilon;
        nonbonded->getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
        if ((particle1 == 0 && particle2 == 3) || (particle1 == 3 && particle2 == 0))
            first14 = i;
        if ((particle1 == 1 && particle2 == 4) || (particle1 == 4 && particle2 == 1))
            second14 = i;
    }
130
    system.addForce(nonbonded);
131
132
133
134
135
136
137
    for (int i = 1; i < 5; ++i) {
 
        // Test LJ forces
        
        vector<Vec3> positions(5);
        const double r = 1.0;
        for (int j = 0; j < 5; ++j) {
Peter Eastman's avatar
Peter Eastman committed
138
            nonbonded->setParticleParameters(j, 0, 1.5, 0);
139
140
            positions[j] = Vec3(0, j, 0);
        }
Peter Eastman's avatar
Peter Eastman committed
141
142
        nonbonded->setParticleParameters(0, 0, 1.5, 1);
        nonbonded->setParticleParameters(i, 0, 1.5, 1);
143
144
        nonbonded->setExceptionParameters(first14, 0, 3, 0, 1.5, i == 3 ? 0.5 : 0.0);
        nonbonded->setExceptionParameters(second14, 1, 4, 0, 1.5, 0.0);
145
        positions[i] = Vec3(r, 0, 0);
146
        Context context(system, integrator, platform);
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
        context.setPositions(positions);
        State state = context.getState(State::Forces | State::Energy);
        const vector<Vec3>& forces = state.getForces();
        double x = 1.5/r;
        double eps = 1.0;
        double force = 4.0*eps*(12*std::pow(x, 12.0)-6*std::pow(x, 6.0))/r;
        double energy = 4.0*eps*(std::pow(x, 12.0)-std::pow(x, 6.0));
        if (i == 3) {
            force *= 0.5;
            energy *= 0.5;
        }
        if (i < 3) {
            force = 0;
            energy = 0;
        }
        ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], TOL);
        ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[i], TOL);
        ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), TOL);

        // Test Coulomb forces
        
Peter Eastman's avatar
Peter Eastman committed
168
169
        nonbonded->setParticleParameters(0, 2, 1.5, 0);
        nonbonded->setParticleParameters(i, 2, 1.5, 0);
170
171
        nonbonded->setExceptionParameters(first14, 0, 3, i == 3 ? 4/1.2 : 0, 1.5, 0);
        nonbonded->setExceptionParameters(second14, 1, 4, 0, 1.5, 0);
172
        Context context2(system, integrator, platform);
173
174
175
        context2.setPositions(positions);
        state = context2.getState(State::Forces | State::Energy);
        const vector<Vec3>& forces2 = state.getForces();
176
177
        force = ONE_4PI_EPS0*4/(r*r);
        energy = ONE_4PI_EPS0*4/r;
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
        if (i == 3) {
            force /= 1.2;
            energy /= 1.2;
        }
        if (i < 3) {
            force = 0;
            energy = 0;
        }
        ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces2[0], TOL);
        ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces2[i], TOL);
        ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), TOL);
    }
}

void testCutoff() {
    CudaPlatform platform;
194
195
196
197
    System system;
    system.addParticle(1.0);
    system.addParticle(1.0);
    system.addParticle(1.0);
198
    LangevinIntegrator integrator(0.0, 0.1, 0.01);
199
200
201
202
    NonbondedForce* forceField = new NonbondedForce();
    forceField->addParticle(1.0, 1, 0);
    forceField->addParticle(1.0, 1, 0);
    forceField->addParticle(1.0, 1, 0);
203
    forceField->setNonbondedMethod(NonbondedForce::CutoffNonPeriodic);
204
205
    const double cutoff = 2.9;
    forceField->setCutoffDistance(cutoff);
206
207
    const double eps = 50.0;
    forceField->setReactionFieldDielectric(eps);
208
    system.addForce(forceField);
209
    Context context(system, integrator, platform);
210
211
212
213
214
215
216
217
218
    vector<Vec3> positions(3);
    positions[0] = Vec3(0, 0, 0);
    positions[1] = Vec3(0, 2, 0);
    positions[2] = Vec3(0, 3, 0);
    context.setPositions(positions);
    State state = context.getState(State::Forces | State::Energy);
    const vector<Vec3>& forces = state.getForces();
    const double krf = (1.0/(cutoff*cutoff*cutoff))*(eps-1.0)/(2.0*eps+1.0);
    const double crf = (1.0/cutoff)*(3.0*eps)/(2.0*eps+1.0);
219
220
    const double force1 = ONE_4PI_EPS0*(1.0)*(0.25-2.0*krf*2.0);
    const double force2 = ONE_4PI_EPS0*(1.0)*(1.0-2.0*krf*1.0);
221
222
223
    ASSERT_EQUAL_VEC(Vec3(0, -force1, 0), forces[0], TOL);
    ASSERT_EQUAL_VEC(Vec3(0, force1-force2, 0), forces[1], TOL);
    ASSERT_EQUAL_VEC(Vec3(0, force2, 0), forces[2], TOL);
224
225
    const double energy1 = ONE_4PI_EPS0*(1.0)*(0.5+krf*4.0-crf);
    const double energy2 = ONE_4PI_EPS0*(1.0)*(1.0+krf*1.0-crf);
226
227
228
229
230
    ASSERT_EQUAL_TOL(energy1+energy2, state.getPotentialEnergy(), TOL);
}

void testCutoff14() {
    CudaPlatform platform;
231
    System system;
232
    LangevinIntegrator integrator(0.0, 0.1, 0.01);
233
    NonbondedForce* nonbonded = new NonbondedForce();
234
    nonbonded->setNonbondedMethod(NonbondedForce::CutoffNonPeriodic);
235
236
    for (int i = 0; i < 5; ++i) {
        system.addParticle(1.0);
237
        nonbonded->addParticle(0, 1.5, 0);
238
    }
239
    const double cutoff = 3.5;
240
    nonbonded->setCutoffDistance(cutoff);
241
242
    const double eps = 30.0;
    nonbonded->setReactionFieldDielectric(eps);
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
    vector<pair<int, int> > bonds;
    bonds.push_back(pair<int, int>(0, 1));
    bonds.push_back(pair<int, int>(1, 2));
    bonds.push_back(pair<int, int>(2, 3));
    bonds.push_back(pair<int, int>(3, 4));
    nonbonded->createExceptionsFromBonds(bonds, 0.0, 0.0);
    int first14, second14;
    for (int i = 0; i < nonbonded->getNumExceptions(); i++) {
        int particle1, particle2;
        double chargeProd, sigma, epsilon;
        nonbonded->getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
        if ((particle1 == 0 && particle2 == 3) || (particle1 == 3 && particle2 == 0))
            first14 = i;
        if ((particle1 == 1 && particle2 == 4) || (particle1 == 4 && particle2 == 1))
            second14 = i;
    }
259
    system.addForce(nonbonded);
260
    Context context(system, integrator, platform);
261
262
263
264
265
266
267
268
269
270
    vector<Vec3> positions(5);
    positions[0] = Vec3(0, 0, 0);
    positions[1] = Vec3(1, 0, 0);
    positions[2] = Vec3(2, 0, 0);
    positions[3] = Vec3(3, 0, 0);
    positions[4] = Vec3(4, 0, 0);
    for (int i = 1; i < 5; ++i) {
 
        // Test LJ forces
        
Peter Eastman's avatar
Peter Eastman committed
271
        nonbonded->setParticleParameters(0, 0, 1.5, 1);
272
        for (int j = 1; j < 5; ++j)
Peter Eastman's avatar
Peter Eastman committed
273
274
            nonbonded->setParticleParameters(j, 0, 1.5, 0);
        nonbonded->setParticleParameters(i, 0, 1.5, 1);
275
276
        nonbonded->setExceptionParameters(first14, 0, 3, 0, 1.5, i == 3 ? 0.5 : 0.0);
        nonbonded->setExceptionParameters(second14, 1, 4, 0, 1.5, 0.0);
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
        context.reinitialize();
        context.setPositions(positions);
        State state = context.getState(State::Forces | State::Energy);
        const vector<Vec3>& forces = state.getForces();
        double r = positions[i][0];
        double x = 1.5/r;
        double e = 1.0;
        double force = 4.0*e*(12*std::pow(x, 12.0)-6*std::pow(x, 6.0))/r;
        double energy = 4.0*e*(std::pow(x, 12.0)-std::pow(x, 6.0));
        if (i == 3) {
            force *= 0.5;
            energy *= 0.5;
        }
        if (i < 3 || r > cutoff) {
            force = 0;
            energy = 0;
        }
        ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], TOL);
        ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[i], TOL);
        ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), TOL);

        // Test Coulomb forces
        
        const double q = 0.7;
Peter Eastman's avatar
Peter Eastman committed
301
302
        nonbonded->setParticleParameters(0, q, 1.5, 0);
        nonbonded->setParticleParameters(i, q, 1.5, 0);
303
304
        nonbonded->setExceptionParameters(first14, 0, 3, i == 3 ? q*q/1.2 : 0, 1.5, 0);
        nonbonded->setExceptionParameters(second14, 1, 4, 0, 1.5, 0);
305
306
307
308
309
310
        context.reinitialize();
        context.setPositions(positions);
        state = context.getState(State::Forces | State::Energy);
        const vector<Vec3>& forces2 = state.getForces();
        const double krf = (1.0/(cutoff*cutoff*cutoff))*(eps-1.0)/(2.0*eps+1.0);
        const double crf = (1.0/cutoff)*(3.0*eps)/(2.0*eps+1.0);
311
312
        force = ONE_4PI_EPS0*q*q*(1.0/(r*r)-2.0*krf*r);
        energy = ONE_4PI_EPS0*q*q*(1.0/r+krf*r*r-crf);
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
        if (i == 3) {
            force /= 1.2;
            energy /= 1.2;
        }
        if (i < 3 || r > cutoff) {
            force = 0;
            energy = 0;
        }
        ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces2[0], TOL);
        ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces2[i], TOL);
        ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), TOL);
    }
}

void testPeriodic() {
    CudaPlatform platform;
329
330
331
332
    System system;
    system.addParticle(1.0);
    system.addParticle(1.0);
    system.addParticle(1.0);
333
    LangevinIntegrator integrator(0.0, 0.1, 0.01);
334
335
336
337
338
    NonbondedForce* nonbonded = new NonbondedForce();
    nonbonded->addParticle(1.0, 1, 0);
    nonbonded->addParticle(1.0, 1, 0);
    nonbonded->addParticle(1.0, 1, 0);
    nonbonded->addException(0, 1, 0.0, 1.0, 0.0);
339
    nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
340
    const double cutoff = 2.0;
341
    nonbonded->setCutoffDistance(cutoff);
342
    system.setPeriodicBoxVectors(Vec3(4, 0, 0), Vec3(0, 4, 0), Vec3(0, 0, 4));
343
    system.addForce(nonbonded);
344
    Context context(system, integrator, platform);
345
346
347
348
349
350
351
352
353
354
    vector<Vec3> positions(3);
    positions[0] = Vec3(0, 0, 0);
    positions[1] = Vec3(2, 0, 0);
    positions[2] = Vec3(3, 0, 0);
    context.setPositions(positions);
    State state = context.getState(State::Forces | State::Energy);
    const vector<Vec3>& forces = state.getForces();
    const double eps = 78.3;
    const double krf = (1.0/(cutoff*cutoff*cutoff))*(eps-1.0)/(2.0*eps+1.0);
    const double crf = (1.0/cutoff)*(3.0*eps)/(2.0*eps+1.0);
355
    const double force = ONE_4PI_EPS0*(1.0)*(1.0-2.0*krf*1.0);
356
357
358
    ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[0], TOL);
    ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[1], TOL);
    ASSERT_EQUAL_VEC(Vec3(0, 0, 0), forces[2], TOL);
359
    ASSERT_EQUAL_TOL(2*ONE_4PI_EPS0*(1.0)*(1.0+krf*1.0-crf), state.getPotentialEnergy(), TOL);
360
361
}

362

363
364
365
366
void testLargeSystem() {
    const int numMolecules = 600;
    const int numParticles = numMolecules*2;
    const double cutoff = 2.0;
367
    const double boxSize = 20.0;
368
369
370
    const double tol = 1e-3;
    CudaPlatform cuda;
    ReferencePlatform reference;
371
372
373
    System system;
    for (int i = 0; i < numParticles; i++)
        system.addParticle(1.0);
374
    VerletIntegrator integrator(0.01);
375
    NonbondedForce* nonbonded = new NonbondedForce();
376
    HarmonicBondForce* bonds = new HarmonicBondForce();
377
378
379
380
381
    vector<Vec3> positions(numParticles);
    vector<Vec3> velocities(numParticles);
    init_gen_rand(0);
    for (int i = 0; i < numMolecules; i++) {
        if (i < numMolecules/2) {
382
            nonbonded->addParticle(-1.0, 0.2, 0.1);
383
            nonbonded->addParticle(1.0, 0.1, 0.1);
384
385
        }
        else {
386
            nonbonded->addParticle(-1.0, 0.2, 0.2);
387
            nonbonded->addParticle(1.0, 0.1, 0.2);
388
389
390
391
392
        }
        positions[2*i] = Vec3(boxSize*genrand_real2(), boxSize*genrand_real2(), boxSize*genrand_real2());
        positions[2*i+1] = Vec3(positions[2*i][0]+1.0, positions[2*i][1], positions[2*i][2]);
        velocities[2*i] = Vec3(genrand_real2(), genrand_real2(), genrand_real2());
        velocities[2*i+1] = Vec3(genrand_real2(), genrand_real2(), genrand_real2());
393
        bonds->addBond(2*i, 2*i+1, 1.0, 0.1);
394
        nonbonded->addException(2*i, 2*i+1, 0.0, 0.15, 0.0);
395
396
397
398
399
400
401
402
403
    }

    // Try with cutoffs but not periodic boundary conditions, and make sure the Cuda and Reference
    // platforms agree.

    nonbonded->setNonbondedMethod(NonbondedForce::CutoffNonPeriodic);
    nonbonded->setCutoffDistance(cutoff);
    system.addForce(nonbonded);
    system.addForce(bonds);
404
405
    Context cudaContext(system, integrator, cuda);
    Context referenceContext(system, integrator, reference);
406
407
408
409
    cudaContext.setPositions(positions);
    cudaContext.setVelocities(velocities);
    referenceContext.setPositions(positions);
    referenceContext.setVelocities(velocities);
410
411
    State cudaState = cudaContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
    State referenceState = referenceContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
412
413
414
415
416
    for (int i = 0; i < numParticles; i++) {
        ASSERT_EQUAL_VEC(cudaState.getPositions()[i], referenceState.getPositions()[i], tol);
        ASSERT_EQUAL_VEC(cudaState.getVelocities()[i], referenceState.getVelocities()[i], tol);
        ASSERT_EQUAL_VEC(cudaState.getForces()[i], referenceState.getForces()[i], tol);
    }
417
    ASSERT_EQUAL_TOL(cudaState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
418
419
420
421

    // Now do the same thing with periodic boundary conditions.

    nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
422
    system.setPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
423
424
425
426
427
428
    cudaContext.reinitialize();
    referenceContext.reinitialize();
    cudaContext.setPositions(positions);
    cudaContext.setVelocities(velocities);
    referenceContext.setPositions(positions);
    referenceContext.setVelocities(velocities);
429
430
    cudaState = cudaContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
    referenceState = referenceContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
431
    for (int i = 0; i < numParticles; i++) {
432
433
434
        ASSERT_EQUAL_TOL(fmod(cudaState.getPositions()[i][0]-referenceState.getPositions()[i][0], boxSize), 0, tol);
        ASSERT_EQUAL_TOL(fmod(cudaState.getPositions()[i][1]-referenceState.getPositions()[i][1], boxSize), 0, tol);
        ASSERT_EQUAL_TOL(fmod(cudaState.getPositions()[i][2]-referenceState.getPositions()[i][2], boxSize), 0, tol);
435
436
437
        ASSERT_EQUAL_VEC(cudaState.getVelocities()[i], referenceState.getVelocities()[i], tol);
        ASSERT_EQUAL_VEC(cudaState.getForces()[i], referenceState.getForces()[i], tol);
    }
438
    ASSERT_EQUAL_TOL(cudaState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
439
440
441
442
443
444
445
446
447
}

void testBlockInteractions(bool periodic) {
    const int blockSize = 32;
    const int numBlocks = 100;
    const int numParticles = blockSize*numBlocks;
    const double cutoff = 1.0;
    const double boxSize = (periodic ? 5.1 : 1.1);
    CudaPlatform cuda;
448
    System system;
449
    VerletIntegrator integrator(0.01);
450
    NonbondedForce* nonbonded = new NonbondedForce();
451
452
453
    vector<Vec3> positions(numParticles);
    init_gen_rand(0);
    for (int i = 0; i < numParticles; i++) {
454
        system.addParticle(1.0);
455
        nonbonded->addParticle(1.0, 0.2, 0.2);
456
457
458
459
        positions[i] = Vec3(boxSize*(3*genrand_real2()-1), boxSize*(3*genrand_real2()-1), boxSize*(3*genrand_real2()-1));
    }
    nonbonded->setNonbondedMethod(periodic ? NonbondedForce::CutoffPeriodic : NonbondedForce::CutoffNonPeriodic);
    nonbonded->setCutoffDistance(cutoff);
460
    system.setPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
461
    system.addForce(nonbonded);
462
    Context context(system, integrator, cuda);
463
464
    context.setPositions(positions);
    State state = context.getState(State::Positions | State::Velocities | State::Forces);
465
    ContextImpl* contextImpl = *reinterpret_cast<ContextImpl**>(&context);
466
467
468
469
470
471
472
473
    CudaPlatform::PlatformData& data = *static_cast<CudaPlatform::PlatformData*>(contextImpl->getPlatformData());
    
    // Verify that the bounds of each block were calculated correctly.

    data.gpu->psPosq4->Download();
    data.gpu->psGridBoundingBox->Download();
    data.gpu->psGridCenter->Download();
    for (int i = 0; i < numBlocks; i++) {
474
475
        float4 gridSize = (*data.gpu->psGridBoundingBox)[i];
        float4 center = (*data.gpu->psGridCenter)[i];
476
477
478
479
480
481
482
        if (periodic) {
            ASSERT(gridSize.x < 0.5*boxSize);
            ASSERT(gridSize.y < 0.5*boxSize);
            ASSERT(gridSize.z < 0.5*boxSize);
        }
        float minx = 0.0, maxx = 0.0, miny = 0.0, maxy = 0.0, minz = 0.0, maxz = 0.0, radius = 0.0;
        for (int j = 0; j < blockSize; j++) {
483
            float4 pos = (*data.gpu->psPosq4)[i*blockSize+j];
484
485
486
487
            float dx = pos.x-center.x;
            float dy = pos.y-center.y;
            float dz = pos.z-center.z;
            if (periodic) {
488
489
490
                dx -= (float)(floor(0.5+dx/boxSize)*boxSize);
                dy -= (float)(floor(0.5+dy/boxSize)*boxSize);
                dz -= (float)(floor(0.5+dz/boxSize)*boxSize);
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
            }
            ASSERT(abs(dx) < gridSize.x+TOL);
            ASSERT(abs(dy) < gridSize.y+TOL);
            ASSERT(abs(dz) < gridSize.z+TOL);
            minx = min(minx, dx);
            maxx = max(maxx, dx);
            miny = min(miny, dy);
            maxy = max(maxy, dy);
            minz = min(minz, dz);
            maxz = max(maxz, dz);
        }
        ASSERT_EQUAL_TOL(-minx, gridSize.x, TOL);
        ASSERT_EQUAL_TOL(maxx, gridSize.x, TOL);
        ASSERT_EQUAL_TOL(-miny, gridSize.y, TOL);
        ASSERT_EQUAL_TOL(maxy, gridSize.y, TOL);
        ASSERT_EQUAL_TOL(-minz, gridSize.z, TOL);
        ASSERT_EQUAL_TOL(maxz, gridSize.z, TOL);
    }

    // Verify that interactions were identified correctly.

    data.gpu->psInteractionCount->Download();
513
    int numWithInteractions = (*data.gpu->psInteractionCount)[0];
514
515
516
517
518
519
520
    vector<bool> hasInteractions(data.gpu->sim.workUnits, false);
    data.gpu->psInteractingWorkUnit->Download();
    data.gpu->psInteractionFlag->Download();
    const unsigned int atoms = data.gpu->sim.paddedNumberOfAtoms;
    const unsigned int grid = data.gpu->grid;
    const unsigned int dim = (atoms+(grid-1))/grid;
    for (int i = 0; i < numWithInteractions; i++) {
521
        unsigned int workUnit = (*data.gpu->psInteractingWorkUnit)[i];
522
523
524
525
526
527
528
        unsigned int x = (workUnit >> 17);
        unsigned int y = ((workUnit >> 2) & 0x7fff);
        int tile = (x > y ? x+y*dim-y*(y+1)/2 : y+x*dim-x*(x+1)/2);
        hasInteractions[tile] = true;

        // Make sure this tile really should have been flagged based on bounding volumes.

529
530
531
532
        float4 gridSize1 = (*data.gpu->psGridBoundingBox)[x];
        float4 gridSize2 = (*data.gpu->psGridBoundingBox)[y];
        float4 center1 = (*data.gpu->psGridCenter)[x];
        float4 center2 = (*data.gpu->psGridCenter)[y];
533
534
535
536
        float dx = center1.x-center2.x;
        float dy = center1.y-center2.y;
        float dz = center1.z-center2.z;
        if (periodic) {
537
538
539
            dx -= (float)(floor(0.5+dx/boxSize)*boxSize);
            dy -= (float)(floor(0.5+dy/boxSize)*boxSize);
            dz -= (float)(floor(0.5+dz/boxSize)*boxSize);
540
541
542
543
544
        }
        dx = max(0.0f, abs(dx)-gridSize1.x-gridSize2.x);
        dy = max(0.0f, abs(dy)-gridSize1.y-gridSize2.y);
        dz = max(0.0f, abs(dz)-gridSize1.z-gridSize2.z);
        ASSERT(sqrt(dx*dx+dy*dy+dz*dz) < cutoff+TOL);
545
546
547

        // Check the interaction flags.

548
        unsigned int flags = (*data.gpu->psInteractionFlag)[i];
549
550
        for (int atom2 = 0; atom2 < 32; atom2++) {
            if ((flags & 1) == 0) {
551
                float4 pos2 = (*data.gpu->psPosq4)[y*blockSize+atom2];
552
                for (int atom1 = 0; atom1 < blockSize; ++atom1) {
553
                    float4 pos1 = (*data.gpu->psPosq4)[x*blockSize+atom1];
554
555
556
557
                    float dx = pos2.x-pos1.x;
                    float dy = pos2.y-pos1.y;
                    float dz = pos2.z-pos1.z;
                    if (periodic) {
558
559
560
                        dx -= (float)(floor(0.5+dx/boxSize)*boxSize);
                        dy -= (float)(floor(0.5+dy/boxSize)*boxSize);
                        dz -= (float)(floor(0.5+dz/boxSize)*boxSize);
561
562
563
564
565
566
                    }
                    ASSERT(dx*dx+dy*dy+dz*dz > cutoff*cutoff);
                }
            }
            flags >>= 1;
        }
567
568
569
570
571
    }

    // Check the tiles that did not have interactions to make sure all atoms are beyond the cutoff.

    data.gpu->psWorkUnit->Download();
572
    for (int i = 0; i < (int)hasInteractions.size(); i++)
573
        if (!hasInteractions[i]) {
574
            unsigned int workUnit = (*data.gpu->psWorkUnit)[i];
575
576
577
            unsigned int x = (workUnit >> 17);
            unsigned int y = ((workUnit >> 2) & 0x7fff);
            for (int atom1 = 0; atom1 < blockSize; ++atom1) {
578
                float4 pos1 = (*data.gpu->psPosq4)[x*blockSize+atom1];
579
                for (int atom2 = 0; atom2 < blockSize; ++atom2) {
580
                    float4 pos2 = (*data.gpu->psPosq4)[y*blockSize+atom2];
581
582
583
584
                    float dx = pos1.x-pos2.x;
                    float dy = pos1.y-pos2.y;
                    float dz = pos1.z-pos2.z;
                    if (periodic) {
585
586
587
                        dx -= (float)(floor(0.5+dx/boxSize)*boxSize);
                        dy -= (float)(floor(0.5+dy/boxSize)*boxSize);
                        dz -= (float)(floor(0.5+dz/boxSize)*boxSize);
588
589
590
591
592
593
594
                    }
                    ASSERT(dx*dx+dy*dy+dz*dz > cutoff*cutoff);
                }
            }
        }
}

595
596
597
598
599
int main() {
    try {
        testCoulomb();
        testLJ();
        testExclusionsAnd14();
600
601
602
603
604
605
        testCutoff();
        testCutoff14();
        testPeriodic();
        testLargeSystem();
        testBlockInteractions(false);
        testBlockInteractions(true);
606
607
608
609
610
611
612
613
    }
    catch(const exception& e) {
        cout << "exception: " << e.what() << endl;
        return 1;
    }
    cout << "Done" << endl;
    return 0;
}