CpuNeighborList.cpp 31.6 KB
Newer Older
1
2
3
4
5
6
7
8
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
9
 * Portions copyright (c) 2013-2022 Stanford University and the Authors.      *
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
 * Permission is hereby granted, free of charge, to any person obtaining a    *
 * copy of this software and associated documentation files (the "Software"), *
 * to deal in the Software without restriction, including without limitation  *
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
 * and/or sell copies of the Software, and to permit persons to whom the      *
 * Software is furnished to do so, subject to the following conditions:       *
 *                                                                            *
 * The above copyright notice and this permission notice shall be included in *
 * all copies or substantial portions of the Software.                        *
 *                                                                            *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
 * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
 * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
 * -------------------------------------------------------------------------- */

32
#include "CpuNeighborList.h"
33
#include "openmm/internal/hardware.h"
34
#include "openmm/internal/vectorize.h"
35
#include "hilbert.h"
36
#include <algorithm>
37
38
39
40
41
42
43
44
45
46
47
#include <set>
#include <map>
#include <cmath>

using namespace std;

namespace OpenMM {

class VoxelIndex 
{
public:
48
    VoxelIndex() : y(0), z(0) {
49
    }
50
    VoxelIndex(int y, int z) : y(y), z(z) {
51
    }
52
    int y;
53
    int z;
54
55
};

56
57
58
59
60
/**
 * This data structure organizes the particles spatially.  It divides them into bins along the x and y axes,
 * then sorts each bin along the z axis so ranges can be identified quickly with a binary search.
 */
class CpuNeighborList::Voxels {
61
public:
peastman's avatar
peastman committed
62
    Voxels(int blockSize, float vsy, float vsz, float miny, float maxy, float minz, float maxz, const Vec3* boxVectors, bool usePeriodic) :
63
64
            blockSize(blockSize), voxelSizeY(vsy), voxelSizeZ(vsz), miny(miny), maxy(maxy), minz(minz), maxz(maxz), usePeriodic(usePeriodic) {
        for (int i = 0; i < 3; i++)
65
66
67
68
69
70
            for (int j = 0; j < 3; j++) {
                // Copying to a volatile temporary variable is a workaround for
                // a bug in GCC9 on PPC.
                volatile float temp = (float) boxVectors[i][j];
                periodicBoxVectors[i][j] = temp;
            }
71
72
73
74
75
76
77
78
79
        periodicBoxSize[0] = (float) boxVectors[0][0];
        periodicBoxSize[1] = (float) boxVectors[1][1];
        periodicBoxSize[2] = (float) boxVectors[2][2];
        recipBoxSize[0] = (float) (1/boxVectors[0][0]);
        recipBoxSize[1] = (float) (1/boxVectors[1][1]);
        recipBoxSize[2] = (float) (1/boxVectors[2][2]);
        triclinic = (boxVectors[0][1] != 0.0 || boxVectors[0][2] != 0.0 ||
                     boxVectors[1][0] != 0.0 || boxVectors[1][2] != 0.0 ||
                     boxVectors[2][0] != 0.0 || boxVectors[2][1] != 0.0);
80
        if (usePeriodic) {
81
82
83
84
            ny = (int) floorf(boxVectors[1][1]/voxelSizeY+0.5f);
            nz = (int) floorf(boxVectors[2][2]/voxelSizeZ+0.5f);
            voxelSizeY = boxVectors[1][1]/ny;
            voxelSizeZ = boxVectors[2][2]/nz;
85
86
        }
        else {
87
88
            ny = max(1, min(500, (int) floorf((maxy-miny)/voxelSizeY+0.5f)));
            nz = max(1, min(500, (int) floorf((maxz-minz)/voxelSizeZ+0.5f)));
89
90
            if (maxy > miny)
                voxelSizeY = (maxy-miny)/ny;
91
92
            if (maxz > minz)
                voxelSizeZ = (maxz-minz)/nz;
93
        }
94
        bins.resize(ny);
95
        for (int i = 0; i < ny; i++)
96
            bins[i].resize(nz);
97
98
    }

99
100
101
102
    /**
     * Insert a particle into the voxel data structure.
     */
    void insert(const int& atom, const float* location) {
103
        VoxelIndex voxelIndex = getVoxelIndex(location);
104
        bins[voxelIndex.y][voxelIndex.z].push_back(make_pair(location[0], atom));
105
106
107
    }
    
    /**
108
     * Sort the particles in each voxel by x coordinate.
109
110
     */
    void sortItems() {
111
112
        for (int i = 0; i < ny; i++)
            for (int j = 0; j < nz; j++)
113
                sort(bins[i][j].begin(), bins[i][j].end());
114
    }
115
    
116
    /**
117
     * Find the index of the first particle in voxel (y,z) whose x coordinate is >= the specified value.
118
     */
119
    int findLowerBound(int y, int z, double x, int lower, int upper) const {
120
        const vector<pair<float, int> >& bin = bins[y][z];
121
122
        while (lower < upper) {
            int middle = (lower+upper)/2;
123
            if (bin[middle].first < x)
124
125
126
127
128
129
130
131
                lower = middle+1;
            else
                upper = middle;
        }
        return lower;
    }
    
    /**
132
     * Find the index of the first particle in voxel (y,z) whose x coordinate is greater than the specified value.
133
     */
134
    int findUpperBound(int y, int z, double x, int lower, int upper) const {
135
        const vector<pair<float, int> >& bin = bins[y][z];
136
137
        while (lower < upper) {
            int middle = (lower+upper)/2;
138
            if (bin[middle].first > x)
139
140
141
142
143
144
                upper = middle;
            else
                lower = middle+1;
        }
        return upper;
    }
145

146
147
148
    /**
     * Get the voxel index containing a particular location.
     */
149
    VoxelIndex getVoxelIndex(const float* location) const {
150
        float yperiodic, zperiodic;
151
        if (!usePeriodic) {
152
            yperiodic = location[1]-miny;
153
            zperiodic = location[2]-minz;
154
155
        }
        else {
156
157
158
159
160
            float scale2 = floorf(location[2]*recipBoxSize[2]);
            yperiodic = location[1]-periodicBoxVectors[2][1]*scale2;
            zperiodic = location[2]-periodicBoxVectors[2][2]*scale2;
            float scale1 = floorf(yperiodic*recipBoxSize[1]);
            yperiodic -= periodicBoxVectors[1][0]*scale1;
161
        }
162
163
        int y = max(0, min(ny-1, int(floorf(yperiodic / voxelSizeY))));
        int z = max(0, min(nz-1, int(floorf(zperiodic / voxelSizeZ))));
164
        
165
        return VoxelIndex(y, z);
166
    }
167
        
168
    void getNeighbors(vector<int>& neighbors, int blockIndex, const fvec4& blockCenter, const fvec4& blockWidth, const vector<int>& sortedAtoms, vector<CpuNeighborList::BlockExclusionMask>& exclusions, float maxDistance, const vector<int>& blockAtoms, const vector<float>& blockAtomX, const vector<float>& blockAtomY, const vector<float>& blockAtomZ, const vector<float>& sortedPositions, const vector<VoxelIndex>& atomVoxelIndex) const {
169
170
        neighbors.resize(0);
        exclusions.resize(0);
171
        fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
172
173
174
175
176
177
        fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0);
        fvec4 periodicBoxVec4[3];
        periodicBoxVec4[0] = fvec4(periodicBoxVectors[0][0], periodicBoxVectors[0][1], periodicBoxVectors[0][2], 0);
        periodicBoxVec4[1] = fvec4(periodicBoxVectors[1][0], periodicBoxVectors[1][1], periodicBoxVectors[1][2], 0);
        periodicBoxVec4[2] = fvec4(periodicBoxVectors[2][0], periodicBoxVectors[2][1], periodicBoxVectors[2][2], 0);

178
        float maxDistanceSquared = maxDistance * maxDistance;
179
180
        float refineCutoff = maxDistance-max(max(blockWidth[0], blockWidth[1]), blockWidth[2]);
        float refineCutoffSquared = refineCutoff*refineCutoff;
181

182
183
        int dIndexY = int((maxDistance+blockWidth[1])/voxelSizeY)+1; // How may voxels away do we have to look?
        int dIndexZ = int((maxDistance+blockWidth[2])/voxelSizeZ)+1;
peastman's avatar
peastman committed
184
185
        if (usePeriodic) {
            dIndexY = min(ny/2, dIndexY);
186
            dIndexZ = min(nz/2, dIndexZ);
peastman's avatar
peastman committed
187
        }
188
189
190
        float centerPos[4];
        blockCenter.store(centerPos);
        VoxelIndex centerVoxelIndex = getVoxelIndex(centerPos);
191
192
193
194
195
196
197

        // Loop over voxels along the z axis.

        int startz = centerVoxelIndex.z-dIndexZ;
        int endz = centerVoxelIndex.z+dIndexZ;
        if (usePeriodic)
            endz = min(endz, startz+nz-1);
198
        else {
199
200
            startz = max(startz, 0);
            endz = min(endz, nz-1);
201
        }
202
        int lastSortedIndex = blockSize*(blockIndex+1);
203
        VoxelIndex voxelIndex(0, 0);
204
205
        for (int z = startz; z <= endz; ++z) {
            voxelIndex.z = z;
206
            if (usePeriodic)
207
208
209
210
                voxelIndex.z = (z < 0 ? z+nz : (z >= nz ? z-nz : z));

            // Loop over voxels along the y axis.

211
            float boxz = floor((float) z/nz);
212
213
214
215
216
217
218
219
220
221
222
223
            int starty = centerVoxelIndex.y-dIndexY;
            int endy = centerVoxelIndex.y+dIndexY;
            float yoffset = (float) (usePeriodic ? boxz*periodicBoxVectors[2][1] : 0);
            if (usePeriodic) {
                starty -= (int) ceil(yoffset/voxelSizeY);
                endy -= (int) floor(yoffset/voxelSizeY);
                endy = min(endy, starty+ny-1);
            }
            else {
                starty = max(starty, 0);
                endy = min(endy, ny-1);
            }
224
            for (int y = starty; y <= endy; ++y) {
225
226
227
                voxelIndex.y = y;
                if (usePeriodic)
                    voxelIndex.y = (y < 0 ? y+ny : (y >= ny ? y-ny : y));
228
                float boxy = floor((float) y/ny);
229
230
231
232
                
                // Identify the range of atoms within this bin we need to search.  When using periodic boundary
                // conditions, there may be two separate ranges.
                
233
234
                float minx = centerPos[0];
                float maxx = centerPos[0];
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
                if (usePeriodic && triclinic) {
                    for (int k = 0; k < (int) blockAtoms.size(); k++) {
                        const float* atomPos = &sortedPositions[4*(blockSize*blockIndex+k)];
                        fvec4 delta1(0, voxelSizeY*voxelIndex.y-atomPos[1], voxelSizeZ*voxelIndex.z-atomPos[2], 0);
                        fvec4 delta2 = delta1+fvec4(0, voxelSizeY, 0, 0);
                        fvec4 delta3 = delta1+fvec4(0, 0, voxelSizeZ, 0);
                        fvec4 delta4 = delta1+fvec4(0, voxelSizeY, voxelSizeZ, 0);
                        delta1 -= periodicBoxVec4[2]*floorf(delta1[2]*recipBoxSize[2]+0.5f);
                        delta1 -= periodicBoxVec4[1]*floorf(delta1[1]*recipBoxSize[1]+0.5f);
                        delta1 -= periodicBoxVec4[0]*floorf(delta1[0]*recipBoxSize[0]+0.5f);
                        delta2 -= periodicBoxVec4[2]*floorf(delta2[2]*recipBoxSize[2]+0.5f);
                        delta2 -= periodicBoxVec4[1]*floorf(delta2[1]*recipBoxSize[1]+0.5f);
                        delta2 -= periodicBoxVec4[0]*floorf(delta2[0]*recipBoxSize[0]+0.5f);
                        delta3 -= periodicBoxVec4[2]*floorf(delta3[2]*recipBoxSize[2]+0.5f);
                        delta3 -= periodicBoxVec4[1]*floorf(delta3[1]*recipBoxSize[1]+0.5f);
                        delta3 -= periodicBoxVec4[0]*floorf(delta3[0]*recipBoxSize[0]+0.5f);
                        delta4 -= periodicBoxVec4[2]*floorf(delta4[2]*recipBoxSize[2]+0.5f);
                        delta4 -= periodicBoxVec4[1]*floorf(delta4[1]*recipBoxSize[1]+0.5f);
                        delta4 -= periodicBoxVec4[0]*floorf(delta4[0]*recipBoxSize[0]+0.5f);
                        if (delta1[1] < 0 && delta1[1]+voxelSizeY > 0)
                            delta1 = fvec4(delta1[0], 0, delta1[2], 0);
                        if (delta1[2] < 0 && delta1[2]+voxelSizeZ > 0)
                            delta1 = fvec4(delta1[0], delta1[1], 0, 0);
                        if (delta3[1] < 0 && delta3[1]+voxelSizeY > 0)
                            delta3 = fvec4(delta3[0], 0, delta3[2], 0);
                        if (delta2[2] < 0 && delta2[2]+voxelSizeZ > 0)
                            delta2 = fvec4(delta2[0], delta2[1], 0, 0);
                        fvec4 delta = min(min(min(abs(delta1), abs(delta2)), abs(delta3)), abs(delta4));
                        float dy = (voxelIndex.y == atomVoxelIndex[k].y ? 0.0f : delta[1]);
                        float dz = (voxelIndex.z == atomVoxelIndex[k].z ? 0.0f : delta[2]);
                        float dist2 = maxDistanceSquared-dy*dy-dz*dz;
                        if (dist2 > 0) {
                            float dist = sqrtf(dist2);
                            minx = min(minx, atomPos[0]-dist-max(max(max(delta1[0], delta2[0]), delta3[0]), delta4[0]));
                            maxx = max(maxx, atomPos[0]+dist-min(min(min(delta1[0], delta2[0]), delta3[0]), delta4[0]));
                        }
271
                    }
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
                }
                else {
                    float xoffset = (float) (usePeriodic ? boxy*periodicBoxVectors[1][0]+boxz*periodicBoxVectors[2][0] : 0);
                    fvec4 offset(-xoffset, -yoffset+voxelSizeY*y+(usePeriodic ? 0.0f : miny), voxelSizeZ*z+(usePeriodic ? 0.0f : minz), 0);
                    for (int k = 0; k < (int) blockAtoms.size(); k++) {
                        const float* atomPos = &sortedPositions[4*(blockSize*blockIndex+k)];
                        fvec4 posVec(atomPos);
                        fvec4 delta1 = offset-posVec;
                        fvec4 delta2 = delta1+fvec4(0, voxelSizeY, voxelSizeZ, 0);
                        if (usePeriodic) {
                            delta1 -= round(delta1*invBoxSize)*boxSize;
                            delta2 -= round(delta2*invBoxSize)*boxSize;
                        }
                        fvec4 delta = min(abs(delta1), abs(delta2));
                        float dy = (y == atomVoxelIndex[k].y ? 0.0f : delta[1]);
                        float dz = (z == atomVoxelIndex[k].z ? 0.0f : delta[2]);
                        float dist2 = maxDistanceSquared-dy*dy-dz*dz;
                        if (dist2 > 0) {
                            float dist = sqrtf(dist2);
                            minx = min(minx, atomPos[0]-dist-xoffset);
                            maxx = max(maxx, atomPos[0]+dist-xoffset);
                        }
294
295
                    }
                }
296
                if (minx == maxx)
peastman's avatar
peastman committed
297
                    continue;
298
299
300
                bool needPeriodic = usePeriodic && (centerPos[1]-blockWidth[1] < maxDistance || centerPos[1]+blockWidth[1] > periodicBoxSize[1]-maxDistance ||
                                                    centerPos[2]-blockWidth[2] < maxDistance || centerPos[2]+blockWidth[2] > periodicBoxSize[2]-maxDistance ||
                                                    minx < 0.0f || maxx > periodicBoxVectors[0][0]);
301
                int numRanges;
302
303
                int rangeStart[2];
                int rangeEnd[2];
304
305
                int binSize = bins[voxelIndex.y][voxelIndex.z].size();
                rangeStart[0] = findLowerBound(voxelIndex.y, voxelIndex.z, minx, 0, binSize);
306
307
                if (needPeriodic) {
                    numRanges = 2;
308
309
310
311
                    rangeEnd[0] = findUpperBound(voxelIndex.y, voxelIndex.z, maxx, rangeStart[0], binSize);
                    if (rangeStart[0] > 0 && rangeEnd[0] < binSize)
                        numRanges = 1;
                    else if (rangeStart[0] > 0) {
312
                        rangeStart[1] = 0;
313
                        rangeEnd[1] = min(findUpperBound(voxelIndex.y, voxelIndex.z, maxx-periodicBoxSize[0], 0, rangeStart[0]), rangeStart[0]);
314
315
                    }
                    else {
316
                        rangeStart[1] = max(findLowerBound(voxelIndex.y, voxelIndex.z, minx+periodicBoxSize[0], rangeEnd[0], binSize), rangeEnd[0]);
317
                        rangeEnd[1] = bins[voxelIndex.y][voxelIndex.z].size();
318
319
                    }
                }
320
321
                else {
                    numRanges = 1;
322
                    rangeEnd[0] = findUpperBound(voxelIndex.y, voxelIndex.z, maxx, rangeStart[0], binSize);
323
                }
324
                bool periodicRectangular = (needPeriodic && !triclinic);
325
326
327
                
                // Loop over atoms and check to see if they are neighbors of this block.
                
328
                const vector<pair<float, int> >& voxelBins = bins[voxelIndex.y][voxelIndex.z];
329
330
                for (int range = 0; range < numRanges; range++) {
                    for (int item = rangeStart[range]; item < rangeEnd[range]; item++) {
331
                        const int sortedIndex = voxelBins[item].second;
332

peastman's avatar
peastman committed
333
                        // Avoid duplicate entries.
334
                        if (sortedIndex >= lastSortedIndex)
335
                            continue;
336
                        
337
                        fvec4 atomPos(&sortedPositions[4*sortedIndex]);
338
                        fvec4 delta = atomPos-blockCenter;
339
340
                        if (periodicRectangular)
                            delta -= round(delta*invBoxSize)*boxSize;
341
342
343
344
345
                        else if (needPeriodic) {
                            delta -= periodicBoxVec4[2]*floorf(delta[2]*recipBoxSize[2]+0.5f);
                            delta -= periodicBoxVec4[1]*floorf(delta[1]*recipBoxSize[1]+0.5f);
                            delta -= periodicBoxVec4[0]*floorf(delta[0]*recipBoxSize[0]+0.5f);
                        }
346
                        delta = max(0.0f, abs(delta)-blockWidth);
347
                        float dSquared = dot3(delta, delta);
348
349
                        if (dSquared > maxDistanceSquared)
                            continue;
peastman's avatar
peastman committed
350
                        
351
352
353
354
                        if (dSquared > refineCutoffSquared) {
                            // The distance is large enough that there might not be any actual interactions.
                            // Check individual atom pairs to be sure.
                            
355
356
357
358
359
                            bool anyInteraction = false;
                            for (int k = 0; k < (int) blockAtoms.size(); k += 4) {
                                fvec4 dx = fvec4(&blockAtomX[k])-atomPos[0];
                                fvec4 dy = fvec4(&blockAtomY[k])-atomPos[1];
                                fvec4 dz = fvec4(&blockAtomZ[k])-atomPos[2];
360
                                if (periodicRectangular) {
361
362
363
                                    dx -= round(dx*invBoxSize[0])*boxSize[0];
                                    dy -= round(dy*invBoxSize[1])*boxSize[1];
                                    dz -= round(dz*invBoxSize[2])*boxSize[2];
364
                                }
365
                                else if (needPeriodic) {
366
367
368
369
370
371
372
373
374
                                    fvec4 scale3 = floor(dz*recipBoxSize[2]+0.5f);
                                    dx -= scale3*periodicBoxVectors[2][0];
                                    dy -= scale3*periodicBoxVectors[2][1];
                                    dz -= scale3*periodicBoxVectors[2][2];
                                    fvec4 scale2 = floor(dy*recipBoxSize[1]+0.5f);
                                    dx -= scale2*periodicBoxVectors[1][0];
                                    dy -= scale2*periodicBoxVectors[1][1];
                                    fvec4 scale1 = floor(dx*recipBoxSize[0]+0.5f);
                                    dx -= scale1*periodicBoxVectors[0][0];
375
                                }
376
377
378
                                fvec4 r2 = dx*dx + dy*dy + dz*dz;
                                if (any(r2 < maxDistanceSquared)) {
                                    anyInteraction = true;
379
380
381
                                    break;
                                }
                            }
382
                            if (!anyInteraction)
383
384
385
386
387
388
                                continue;
                        }
                        
                        // Add this atom to the list of neighbors.
                        
                        neighbors.push_back(sortedAtoms[sortedIndex]);
389
                        if (sortedIndex < blockSize*blockIndex)
390
                            exclusions.push_back(0);
391
392
393
394
                        else {
                            int mask = (1<<blockSize)-1;
                            exclusions.push_back(mask & (mask<<(sortedIndex-blockSize*blockIndex)));
                        }
395
396
397
398
399
400
401
                    }
                }
            }
        }
    }

private:
402
    int blockSize;
403
404
405
406
407
    float voxelSizeY, voxelSizeZ;
    float miny, maxy, minz, maxz;
    int ny, nz;
    float periodicBoxSize[3], recipBoxSize[3];
    bool triclinic;
408
    float periodicBoxVectors[3][3];
409
    const bool usePeriodic;
peastman's avatar
peastman committed
410
    vector<vector<vector<pair<float, int> > > > bins;
411
412
};

413
CpuNeighborList::CpuNeighborList(int blockSize) : blockSize(blockSize) {
414
}
415

416
void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const vector<set<int> >& exclusions,
peastman's avatar
peastman committed
417
            const Vec3* periodicBoxVectors, bool usePeriodic, float maxDistance, ThreadPool& threads) {
418
    dense = false;
419
    int numBlocks = (numAtoms+blockSize-1)/blockSize;
420
421
422
    blockNeighbors.resize(numBlocks);
    blockExclusions.resize(numBlocks);
    sortedAtoms.resize(numAtoms);
423
    sortedPositions.resize(4*numAtoms);
424
    
425
    // Record the parameters for the threads.
426
    
427
428
    this->exclusions = &exclusions;
    this->atomLocations = &atomLocations[0];
429
430
431
    this->periodicBoxVectors[0] = periodicBoxVectors[0];
    this->periodicBoxVectors[1] = periodicBoxVectors[1];
    this->periodicBoxVectors[2] = periodicBoxVectors[2];
432
433
434
435
436
437
438
439
    this->numAtoms = numAtoms;
    this->usePeriodic = usePeriodic;
    this->maxDistance = maxDistance;
    
    // Identify the range of atom positions along each axis.
    
    fvec4 minPos(&atomLocations[0]);
    fvec4 maxPos = minPos;
440
    for (int i = 0; i < numAtoms; i++) {
441
442
443
        fvec4 pos(&atomLocations[4*i]);
        minPos = min(minPos, pos);
        maxPos = max(maxPos, pos);
444
    }
445
446
447
448
449
450
451
452
453
454
    minx = minPos[0];
    maxx = maxPos[0];
    miny = minPos[1];
    maxy = maxPos[1];
    minz = minPos[2];
    maxz = maxPos[2];
    
    // Sort the atoms based on a Hilbert curve.
    
    atomBins.resize(numAtoms);
peastman's avatar
peastman committed
455
    threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeNeighborList(threads, threadIndex); });
456
    threads.waitForThreads();
457
458
    sort(atomBins.begin(), atomBins.end());

459
    // Build the voxel hash.
460

461
    float edgeSizeY, edgeSizeZ;
462
    if (!usePeriodic)
463
        edgeSizeY = edgeSizeZ = maxDistance; // TODO - adjust this as needed
464
    else {
465
466
        edgeSizeY = 0.6f*periodicBoxVectors[1][1]/floorf(periodicBoxVectors[1][1]/maxDistance);
        edgeSizeZ = 0.6f*periodicBoxVectors[2][2]/floorf(periodicBoxVectors[2][2]/maxDistance);
467
    }
468
    Voxels voxels(blockSize, edgeSizeY, edgeSizeZ, miny, maxy, minz, maxz, periodicBoxVectors, usePeriodic);
469
470
471
    for (int i = 0; i < numAtoms; i++) {
        int atomIndex = atomBins[i].second;
        sortedAtoms[i] = atomIndex;
472
473
        fvec4 atomPos(&atomLocations[4*atomIndex]);
        atomPos.store(&sortedPositions[4*i]);
474
        voxels.insert(i, &atomLocations[4*atomIndex]);
475
    }
476
477
    voxels.sortItems();
    this->voxels = &voxels;
478

479
480
    // Signal the threads to start running and wait for them to finish.
    
peastman's avatar
peastman committed
481
    atomicCounter = 0;
482
483
    threads.resumeThreads();
    threads.waitForThreads();
484
    
485
    // Add padding atoms to fill up the last block.
486
    
487
    int numPadding = numBlocks*blockSize-numAtoms;
488
    if (numPadding > 0) {
489
        const BlockExclusionMask mask = (~0) << (blockSize - numPadding);
490
491
        for (int i = 0; i < numPadding; i++)
            sortedAtoms.push_back(0);
492
        auto& exc = blockExclusions[blockExclusions.size()-1];
493
494
495
        for (int i = 0; i < (int) exc.size(); i++)
            exc[i] |= mask;
    }
496
497
}

498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
void CpuNeighborList::createDenseNeighborList(int numAtoms, const vector<set<int> >& exclusions) {
    dense = true;
    this->numAtoms = numAtoms;
    int numBlocks = (numAtoms+blockSize-1)/blockSize;
    blockExclusionIndices.resize(numBlocks);
    blockExclusions.resize(numBlocks);
    sortedAtoms.resize(numAtoms);
    for (int i = 0; i < numAtoms; i++)
        sortedAtoms[i] = i;
    for (int i = 0; i < numBlocks; i++) {
        // Build the list of exclusions for this block.

        int firstIndex = blockSize*i;
        int atomsInBlock = min(blockSize, numAtoms-firstIndex);
        map<int, int> exclusionMap;
        for (int j = 0; j < atomsInBlock; j++) {
            exclusionMap[firstIndex+j] = (1<<(j+1))-1;
        }
        for (int j = 0; j < atomsInBlock; j++) {
            const set<int>& atomExclusions = exclusions[firstIndex+j];
            const BlockExclusionMask mask = 1<<j;
            for (int exclusion : atomExclusions) {
                if (firstIndex <= exclusion) {
                    auto thisAtomFlags = exclusionMap.find(exclusion);
                    if (thisAtomFlags == exclusionMap.end())
                        exclusionMap[exclusion] = mask;
                    else
                        thisAtomFlags->second |= mask;
                }
            }
        }
        blockExclusionIndices[i].clear();
        blockExclusions[i].clear();
        for (auto flags : exclusionMap) {
            blockExclusionIndices[i].push_back(flags.first);
            blockExclusions[i].push_back(flags.second);
        }
    }

    // Add padding atoms to fill up the last block.

    int numPadding = numBlocks*blockSize-numAtoms;
    if (numPadding > 0) {
        const BlockExclusionMask mask = (~0) << (blockSize - numPadding);
        for (int i = 0; i < numPadding; i++)
            sortedAtoms.push_back(0);
        auto& exc = blockExclusions.back();
        for (int i = 0; i < (int) exc.size(); i++)
            exc[i] |= mask;
    }
}

550
int CpuNeighborList::getNumBlocks() const {
551
    return sortedAtoms.size()/blockSize;
552
553
}

554
555
556
557
int CpuNeighborList::getBlockSize() const {
    return blockSize;
}

Daniel Towner's avatar
Daniel Towner committed
558
const std::vector<int32_t>& CpuNeighborList::getSortedAtoms() const {
559
560
561
562
563
564
565
    return sortedAtoms;
}

const std::vector<int>& CpuNeighborList::getBlockNeighbors(int blockIndex) const {
    return blockNeighbors[blockIndex];
}

566
const std::vector<CpuNeighborList::BlockExclusionMask>& CpuNeighborList::getBlockExclusions(int blockIndex) const {
567
568
569
570
    return blockExclusions[blockIndex];
    
}

571
572
573
574
575
576
577
CpuNeighborList::NeighborIterator CpuNeighborList::getNeighborIterator(int blockIndex) const {
    if (dense)
        return NeighborIterator(blockIndex*blockSize, numAtoms, blockExclusionIndices[blockIndex], blockExclusions[blockIndex]);
    else
        return NeighborIterator(blockNeighbors[blockIndex], blockExclusions[blockIndex]);
}

578
579
void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadIndex) {
    // Compute the positions of atoms along the Hilbert curve.
580

581
582
583
584
585
586
587
588
589
590
591
592
593
    float binWidth = max(max(maxx-minx, maxy-miny), maxz-minz)/255.0f;
    float invBinWidth = 1.0f/binWidth;
    bitmask_t coords[3];
    int numThreads = threads.getNumThreads();
    for (int i = threadIndex; i < numAtoms; i += numThreads) {
        const float* pos = &atomLocations[4*i];
        coords[0] = (bitmask_t) ((pos[0]-minx)*invBinWidth);
        coords[1] = (bitmask_t) ((pos[1]-miny)*invBinWidth);
        coords[2] = (bitmask_t) ((pos[2]-minz)*invBinWidth);
        int bin = (int) hilbert_c2i(3, 8, coords);
        atomBins[i] = pair<int, int>(bin, i);
    }
    threads.syncThreads();
594

595
596
597
598
    // Compute this thread's subset of neighbors.

    int numBlocks = blockNeighbors.size();
    vector<int> blockAtoms;
599
    vector<float> blockAtomX(blockSize), blockAtomY(blockSize), blockAtomZ(blockSize);
600
    vector<VoxelIndex> atomVoxelIndex;
601
    while (true) {
peastman's avatar
peastman committed
602
        int i = atomicCounter++;
603
604
605
        if (i >= numBlocks)
            break;

606
607
        // Find the atoms in this block and compute their bounding box.
        
608
609
        int firstIndex = blockSize*i;
        int atomsInBlock = min(blockSize, numAtoms-firstIndex);
610
        blockAtoms.resize(atomsInBlock);
611
612
        atomVoxelIndex.resize(atomsInBlock);
        for (int j = 0; j < atomsInBlock; j++) {
613
            blockAtoms[j] = sortedAtoms[firstIndex+j];
614
615
            atomVoxelIndex[j] = voxels->getVoxelIndex(&atomLocations[4*blockAtoms[j]]);
        }
616
        fvec4 minPos(&sortedPositions[4*firstIndex]);
617
618
        fvec4 maxPos = minPos;
        for (int j = 1; j < atomsInBlock; j++) {
619
            fvec4 pos(&sortedPositions[4*(firstIndex+j)]);
620
621
            minPos = min(minPos, pos);
            maxPos = max(maxPos, pos);
622
        }
623
624
625
626
627
628
629
630
631
632
633
        for (int j = 0; j < atomsInBlock; j++) {
            blockAtomX[j] = sortedPositions[4*(firstIndex+j)];
            blockAtomY[j] = sortedPositions[4*(firstIndex+j)+1];
            blockAtomZ[j] = sortedPositions[4*(firstIndex+j)+2];
        }
        for (int j = atomsInBlock; j < blockSize; j++) {
            blockAtomX[j] = 1e10;
            blockAtomY[j] = 1e10;
            blockAtomZ[j] = 1e10;
        }
        voxels->getNeighbors(blockNeighbors[i], i, (maxPos+minPos)*0.5f, (maxPos-minPos)*0.5f, sortedAtoms, blockExclusions[i], maxDistance, blockAtoms, blockAtomX, blockAtomY, blockAtomZ, sortedPositions, atomVoxelIndex);
634

635
        // Record the exclusions for this block.
636

637
        map<int, BlockExclusionMask> atomFlags;
638
639
        for (int j = 0; j < atomsInBlock; j++) {
            const set<int>& atomExclusions = (*exclusions)[sortedAtoms[firstIndex+j]];
640
            const BlockExclusionMask mask = 1<<j;
peastman's avatar
peastman committed
641
            for (int exclusion : atomExclusions) {
642
                const auto thisAtomFlags = atomFlags.find(exclusion);
643
                if (thisAtomFlags == atomFlags.end())
peastman's avatar
peastman committed
644
                    atomFlags[exclusion] = mask;
645
646
                else
                    thisAtomFlags->second |= mask;
647
648
            }
        }
649
650
651
        int numNeighbors = blockNeighbors[i].size();
        for (int k = 0; k < numNeighbors; k++) {
            int atomIndex = blockNeighbors[i][k];
652
            auto thisAtomFlags = atomFlags.find(atomIndex);
653
654
655
            if (thisAtomFlags != atomFlags.end())
                blockExclusions[i][k] |= thisAtomFlags->second;
        }
656
657
658
    }
}

659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
CpuNeighborList::NeighborIterator::NeighborIterator(const vector<int>& neighbors, const vector<BlockExclusionMask>& exclusions) :
        dense(false), neighbors(&neighbors), exclusions(&exclusions), currentIndex(-1) {
}

CpuNeighborList::NeighborIterator::NeighborIterator(int firstAtom, int lastAtom, const vector<int>& exclusionIndices, const vector<BlockExclusionMask>& exclusions) :
        dense(true), currentAtom(firstAtom-1), lastAtom(lastAtom), exclusionIndices(&exclusionIndices), exclusions(&exclusions), currentIndex(0) {
}

bool CpuNeighborList::NeighborIterator::next() {
    if (dense) {
        if (++currentAtom >= lastAtom)
            return false;
        if (currentIndex < exclusionIndices->size() && (*exclusionIndices)[currentIndex] == currentAtom)
            currentExclusions = (*exclusions)[currentIndex++];
        else
            currentExclusions = 0;
        return true;
    }
    else {
        if (++currentIndex < neighbors->size()) {
            currentAtom = (*neighbors)[currentIndex];
            currentExclusions = (*exclusions)[currentIndex];
            return true;
        }
        return false;
    }
}

int CpuNeighborList::NeighborIterator::getNeighbor() const {
    return currentAtom;
}

CpuNeighborList::BlockExclusionMask CpuNeighborList::NeighborIterator::getExclusions() const {
    return currentExclusions;
}

695
} // namespace OpenMM