CpuNeighborList.cpp 28.1 KB
Newer Older
1
2
3
4
5
6
7
8
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
9
 * Portions copyright (c) 2013-2016 Stanford University and the Authors.      *
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
 * Permission is hereby granted, free of charge, to any person obtaining a    *
 * copy of this software and associated documentation files (the "Software"), *
 * to deal in the Software without restriction, including without limitation  *
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
 * and/or sell copies of the Software, and to permit persons to whom the      *
 * Software is furnished to do so, subject to the following conditions:       *
 *                                                                            *
 * The above copyright notice and this permission notice shall be included in *
 * all copies or substantial portions of the Software.                        *
 *                                                                            *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
 * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
 * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
 * -------------------------------------------------------------------------- */

32
#include "CpuNeighborList.h"
33
#include "openmm/internal/hardware.h"
34
#include "openmm/internal/vectorize.h"
35
#include "hilbert.h"
36
#include <algorithm>
37
38
39
40
41
42
43
44
45
46
47
#include <set>
#include <map>
#include <cmath>

using namespace std;

namespace OpenMM {

class VoxelIndex 
{
public:
48
    VoxelIndex() : y(0), z(0) {
49
    }
50
    VoxelIndex(int y, int z) : y(y), z(z) {
51
    }
52
    int y;
53
    int z;
54
55
};

56
57
58
59
60
/**
 * This data structure organizes the particles spatially.  It divides them into bins along the x and y axes,
 * then sorts each bin along the z axis so ranges can be identified quickly with a binary search.
 */
class CpuNeighborList::Voxels {
61
public:
62
63
64
65
66
67
68
69
70
71
72
73
74
75
    Voxels(int blockSize, float vsy, float vsz, float miny, float maxy, float minz, float maxz, const RealVec* boxVectors, bool usePeriodic) :
            blockSize(blockSize), voxelSizeY(vsy), voxelSizeZ(vsz), miny(miny), maxy(maxy), minz(minz), maxz(maxz), usePeriodic(usePeriodic) {
        for (int i = 0; i < 3; i++)
            for (int j = 0; j < 3; j++)
                periodicBoxVectors[i][j] = (float) boxVectors[i][j];
        periodicBoxSize[0] = (float) boxVectors[0][0];
        periodicBoxSize[1] = (float) boxVectors[1][1];
        periodicBoxSize[2] = (float) boxVectors[2][2];
        recipBoxSize[0] = (float) (1/boxVectors[0][0]);
        recipBoxSize[1] = (float) (1/boxVectors[1][1]);
        recipBoxSize[2] = (float) (1/boxVectors[2][2]);
        triclinic = (boxVectors[0][1] != 0.0 || boxVectors[0][2] != 0.0 ||
                     boxVectors[1][0] != 0.0 || boxVectors[1][2] != 0.0 ||
                     boxVectors[2][0] != 0.0 || boxVectors[2][1] != 0.0);
76
        if (usePeriodic) {
77
78
79
80
            ny = (int) floorf(boxVectors[1][1]/voxelSizeY+0.5f);
            nz = (int) floorf(boxVectors[2][2]/voxelSizeZ+0.5f);
            voxelSizeY = boxVectors[1][1]/ny;
            voxelSizeZ = boxVectors[2][2]/nz;
81
82
83
        }
        else {
            ny = max(1, (int) floorf((maxy-miny)/voxelSizeY+0.5f));
84
            nz = max(1, (int) floorf((maxz-minz)/voxelSizeZ+0.5f));
85
86
            if (maxy > miny)
                voxelSizeY = (maxy-miny)/ny;
87
88
            if (maxz > minz)
                voxelSizeZ = (maxz-minz)/nz;
89
        }
90
91
92
93
        bins.resize(ny);
        for (int i = 0; i < ny; i++) {
            bins[i].resize(nz);
            for (int j = 0; j < nz; j++)
94
                bins[i][j].resize(0);
95
96
97
        }
    }

98
99
100
101
    /**
     * Insert a particle into the voxel data structure.
     */
    void insert(const int& atom, const float* location) {
102
        VoxelIndex voxelIndex = getVoxelIndex(location);
103
        bins[voxelIndex.y][voxelIndex.z].push_back(make_pair(location[0], atom));
104
105
106
    }
    
    /**
107
     * Sort the particles in each voxel by x coordinate.
108
109
     */
    void sortItems() {
110
111
        for (int i = 0; i < ny; i++)
            for (int j = 0; j < nz; j++)
112
                sort(bins[i][j].begin(), bins[i][j].end());
113
    }
114
    
115
    /**
116
     * Find the index of the first particle in voxel (y,z) whose x coordinate is >= the specified value.
117
     */
118
    int findLowerBound(int y, int z, double x, int lower, int upper) const {
119
        const vector<pair<float, int> >& bin = bins[y][z];
120
121
        while (lower < upper) {
            int middle = (lower+upper)/2;
122
            if (bin[middle].first < x)
123
124
125
126
127
128
129
130
                lower = middle+1;
            else
                upper = middle;
        }
        return lower;
    }
    
    /**
131
     * Find the index of the first particle in voxel (y,z) whose x coordinate is greater than the specified value.
132
     */
133
    int findUpperBound(int y, int z, double x, int lower, int upper) const {
134
        const vector<pair<float, int> >& bin = bins[y][z];
135
136
        while (lower < upper) {
            int middle = (lower+upper)/2;
137
            if (bin[middle].first > x)
138
139
140
141
142
143
                upper = middle;
            else
                lower = middle+1;
        }
        return upper;
    }
144

145
146
147
    /**
     * Get the voxel index containing a particular location.
     */
148
    VoxelIndex getVoxelIndex(const float* location) const {
149
        float yperiodic, zperiodic;
150
        if (!usePeriodic) {
151
            yperiodic = location[1]-miny;
152
            zperiodic = location[2]-minz;
153
154
        }
        else {
155
156
157
158
159
            float scale2 = floorf(location[2]*recipBoxSize[2]);
            yperiodic = location[1]-periodicBoxVectors[2][1]*scale2;
            zperiodic = location[2]-periodicBoxVectors[2][2]*scale2;
            float scale1 = floorf(yperiodic*recipBoxSize[1]);
            yperiodic -= periodicBoxVectors[1][0]*scale1;
160
        }
161
162
        int y = max(0, min(ny-1, int(floorf(yperiodic / voxelSizeY))));
        int z = max(0, min(nz-1, int(floorf(zperiodic / voxelSizeZ))));
163
        
164
        return VoxelIndex(y, z);
165
    }
166
167
        
    void getNeighbors(vector<int>& neighbors, int blockIndex, const fvec4& blockCenter, const fvec4& blockWidth, const vector<int>& sortedAtoms, vector<char>& exclusions, float maxDistance, const vector<int>& blockAtoms, const vector<float>& blockAtomX, const vector<float>& blockAtomY, const vector<float>& blockAtomZ, const vector<float>& sortedPositions, const vector<VoxelIndex>& atomVoxelIndex) const {
168
169
        neighbors.resize(0);
        exclusions.resize(0);
170
        fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
171
172
173
174
175
176
        fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0);
        fvec4 periodicBoxVec4[3];
        periodicBoxVec4[0] = fvec4(periodicBoxVectors[0][0], periodicBoxVectors[0][1], periodicBoxVectors[0][2], 0);
        periodicBoxVec4[1] = fvec4(periodicBoxVectors[1][0], periodicBoxVectors[1][1], periodicBoxVectors[1][2], 0);
        periodicBoxVec4[2] = fvec4(periodicBoxVectors[2][0], periodicBoxVectors[2][1], periodicBoxVectors[2][2], 0);

177
        float maxDistanceSquared = maxDistance * maxDistance;
178
179
        float refineCutoff = maxDistance-max(max(blockWidth[0], blockWidth[1]), blockWidth[2]);
        float refineCutoffSquared = refineCutoff*refineCutoff;
180

181
182
        int dIndexY = int((maxDistance+blockWidth[1])/voxelSizeY)+1; // How may voxels away do we have to look?
        int dIndexZ = int((maxDistance+blockWidth[2])/voxelSizeZ)+1;
peastman's avatar
peastman committed
183
184
        if (usePeriodic) {
            dIndexY = min(ny/2, dIndexY);
185
            dIndexZ = min(nz/2, dIndexZ);
peastman's avatar
peastman committed
186
        }
187
188
189
        float centerPos[4];
        blockCenter.store(centerPos);
        VoxelIndex centerVoxelIndex = getVoxelIndex(centerPos);
190
191
192
193
194
195
196

        // Loop over voxels along the z axis.

        int startz = centerVoxelIndex.z-dIndexZ;
        int endz = centerVoxelIndex.z+dIndexZ;
        if (usePeriodic)
            endz = min(endz, startz+nz-1);
197
        else {
198
199
            startz = max(startz, 0);
            endz = min(endz, nz-1);
200
        }
201
        int lastSortedIndex = blockSize*(blockIndex+1);
202
        VoxelIndex voxelIndex(0, 0);
203
204
        for (int z = startz; z <= endz; ++z) {
            voxelIndex.z = z;
205
            if (usePeriodic)
206
207
208
209
                voxelIndex.z = (z < 0 ? z+nz : (z >= nz ? z-nz : z));

            // Loop over voxels along the y axis.

210
            float boxz = floor((float) z/nz);
211
212
213
214
215
216
217
218
219
220
221
222
            int starty = centerVoxelIndex.y-dIndexY;
            int endy = centerVoxelIndex.y+dIndexY;
            float yoffset = (float) (usePeriodic ? boxz*periodicBoxVectors[2][1] : 0);
            if (usePeriodic) {
                starty -= (int) ceil(yoffset/voxelSizeY);
                endy -= (int) floor(yoffset/voxelSizeY);
                endy = min(endy, starty+ny-1);
            }
            else {
                starty = max(starty, 0);
                endy = min(endy, ny-1);
            }
223
            for (int y = starty; y <= endy; ++y) {
224
225
226
                voxelIndex.y = y;
                if (usePeriodic)
                    voxelIndex.y = (y < 0 ? y+ny : (y >= ny ? y-ny : y));
227
                float boxy = floor((float) y/ny);
228
229
230
231
                
                // Identify the range of atoms within this bin we need to search.  When using periodic boundary
                // conditions, there may be two separate ranges.
                
232
233
                float minx = centerPos[0];
                float maxx = centerPos[0];
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
                if (usePeriodic && triclinic) {
                    for (int k = 0; k < (int) blockAtoms.size(); k++) {
                        const float* atomPos = &sortedPositions[4*(blockSize*blockIndex+k)];
                        fvec4 delta1(0, voxelSizeY*voxelIndex.y-atomPos[1], voxelSizeZ*voxelIndex.z-atomPos[2], 0);
                        fvec4 delta2 = delta1+fvec4(0, voxelSizeY, 0, 0);
                        fvec4 delta3 = delta1+fvec4(0, 0, voxelSizeZ, 0);
                        fvec4 delta4 = delta1+fvec4(0, voxelSizeY, voxelSizeZ, 0);
                        delta1 -= periodicBoxVec4[2]*floorf(delta1[2]*recipBoxSize[2]+0.5f);
                        delta1 -= periodicBoxVec4[1]*floorf(delta1[1]*recipBoxSize[1]+0.5f);
                        delta1 -= periodicBoxVec4[0]*floorf(delta1[0]*recipBoxSize[0]+0.5f);
                        delta2 -= periodicBoxVec4[2]*floorf(delta2[2]*recipBoxSize[2]+0.5f);
                        delta2 -= periodicBoxVec4[1]*floorf(delta2[1]*recipBoxSize[1]+0.5f);
                        delta2 -= periodicBoxVec4[0]*floorf(delta2[0]*recipBoxSize[0]+0.5f);
                        delta3 -= periodicBoxVec4[2]*floorf(delta3[2]*recipBoxSize[2]+0.5f);
                        delta3 -= periodicBoxVec4[1]*floorf(delta3[1]*recipBoxSize[1]+0.5f);
                        delta3 -= periodicBoxVec4[0]*floorf(delta3[0]*recipBoxSize[0]+0.5f);
                        delta4 -= periodicBoxVec4[2]*floorf(delta4[2]*recipBoxSize[2]+0.5f);
                        delta4 -= periodicBoxVec4[1]*floorf(delta4[1]*recipBoxSize[1]+0.5f);
                        delta4 -= periodicBoxVec4[0]*floorf(delta4[0]*recipBoxSize[0]+0.5f);
                        if (delta1[1] < 0 && delta1[1]+voxelSizeY > 0)
                            delta1 = fvec4(delta1[0], 0, delta1[2], 0);
                        if (delta1[2] < 0 && delta1[2]+voxelSizeZ > 0)
                            delta1 = fvec4(delta1[0], delta1[1], 0, 0);
                        if (delta3[1] < 0 && delta3[1]+voxelSizeY > 0)
                            delta3 = fvec4(delta3[0], 0, delta3[2], 0);
                        if (delta2[2] < 0 && delta2[2]+voxelSizeZ > 0)
                            delta2 = fvec4(delta2[0], delta2[1], 0, 0);
                        fvec4 delta = min(min(min(abs(delta1), abs(delta2)), abs(delta3)), abs(delta4));
                        float dy = (voxelIndex.y == atomVoxelIndex[k].y ? 0.0f : delta[1]);
                        float dz = (voxelIndex.z == atomVoxelIndex[k].z ? 0.0f : delta[2]);
                        float dist2 = maxDistanceSquared-dy*dy-dz*dz;
                        if (dist2 > 0) {
                            float dist = sqrtf(dist2);
                            minx = min(minx, atomPos[0]-dist-max(max(max(delta1[0], delta2[0]), delta3[0]), delta4[0]));
                            maxx = max(maxx, atomPos[0]+dist-min(min(min(delta1[0], delta2[0]), delta3[0]), delta4[0]));
                        }
270
                    }
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
                }
                else {
                    float xoffset = (float) (usePeriodic ? boxy*periodicBoxVectors[1][0]+boxz*periodicBoxVectors[2][0] : 0);
                    fvec4 offset(-xoffset, -yoffset+voxelSizeY*y+(usePeriodic ? 0.0f : miny), voxelSizeZ*z+(usePeriodic ? 0.0f : minz), 0);
                    for (int k = 0; k < (int) blockAtoms.size(); k++) {
                        const float* atomPos = &sortedPositions[4*(blockSize*blockIndex+k)];
                        fvec4 posVec(atomPos);
                        fvec4 delta1 = offset-posVec;
                        fvec4 delta2 = delta1+fvec4(0, voxelSizeY, voxelSizeZ, 0);
                        if (usePeriodic) {
                            delta1 -= round(delta1*invBoxSize)*boxSize;
                            delta2 -= round(delta2*invBoxSize)*boxSize;
                        }
                        fvec4 delta = min(abs(delta1), abs(delta2));
                        float dy = (y == atomVoxelIndex[k].y ? 0.0f : delta[1]);
                        float dz = (z == atomVoxelIndex[k].z ? 0.0f : delta[2]);
                        float dist2 = maxDistanceSquared-dy*dy-dz*dz;
                        if (dist2 > 0) {
                            float dist = sqrtf(dist2);
                            minx = min(minx, atomPos[0]-dist-xoffset);
                            maxx = max(maxx, atomPos[0]+dist-xoffset);
                        }
293
294
                    }
                }
295
                if (minx == maxx)
peastman's avatar
peastman committed
296
                    continue;
297
298
299
                bool needPeriodic = usePeriodic && (centerPos[1]-blockWidth[1] < maxDistance || centerPos[1]+blockWidth[1] > periodicBoxSize[1]-maxDistance ||
                                                    centerPos[2]-blockWidth[2] < maxDistance || centerPos[2]+blockWidth[2] > periodicBoxSize[2]-maxDistance ||
                                                    minx < 0.0f || maxx > periodicBoxVectors[0][0]);
300
                int numRanges;
301
302
                int rangeStart[2];
                int rangeEnd[2];
303
304
                int binSize = bins[voxelIndex.y][voxelIndex.z].size();
                rangeStart[0] = findLowerBound(voxelIndex.y, voxelIndex.z, minx, 0, binSize);
305
306
                if (needPeriodic) {
                    numRanges = 2;
307
308
309
310
                    rangeEnd[0] = findUpperBound(voxelIndex.y, voxelIndex.z, maxx, rangeStart[0], binSize);
                    if (rangeStart[0] > 0 && rangeEnd[0] < binSize)
                        numRanges = 1;
                    else if (rangeStart[0] > 0) {
311
                        rangeStart[1] = 0;
312
                        rangeEnd[1] = min(findUpperBound(voxelIndex.y, voxelIndex.z, maxx-periodicBoxSize[0], 0, rangeStart[0]), rangeStart[0]);
313
314
                    }
                    else {
315
                        rangeStart[1] = max(findLowerBound(voxelIndex.y, voxelIndex.z, minx+periodicBoxSize[0], rangeEnd[0], binSize), rangeEnd[0]);
316
                        rangeEnd[1] = bins[voxelIndex.y][voxelIndex.z].size();
317
318
                    }
                }
319
320
                else {
                    numRanges = 1;
321
                    rangeEnd[0] = findUpperBound(voxelIndex.y, voxelIndex.z, maxx, rangeStart[0], binSize);
322
                }
323
                bool periodicRectangular = (needPeriodic && !triclinic);
324
325
326
                
                // Loop over atoms and check to see if they are neighbors of this block.
                
327
                const vector<pair<float, int> >& voxelBins = bins[voxelIndex.y][voxelIndex.z];
328
329
                for (int range = 0; range < numRanges; range++) {
                    for (int item = rangeStart[range]; item < rangeEnd[range]; item++) {
330
                        const int sortedIndex = voxelBins[item].second;
331

peastman's avatar
peastman committed
332
                        // Avoid duplicate entries.
333
                        if (sortedIndex >= lastSortedIndex)
334
                            continue;
335
                        
336
                        fvec4 atomPos(&sortedPositions[4*sortedIndex]);
337
                        fvec4 delta = atomPos-blockCenter;
338
339
                        if (periodicRectangular)
                            delta -= round(delta*invBoxSize)*boxSize;
340
341
342
343
344
                        else if (needPeriodic) {
                            delta -= periodicBoxVec4[2]*floorf(delta[2]*recipBoxSize[2]+0.5f);
                            delta -= periodicBoxVec4[1]*floorf(delta[1]*recipBoxSize[1]+0.5f);
                            delta -= periodicBoxVec4[0]*floorf(delta[0]*recipBoxSize[0]+0.5f);
                        }
345
                        delta = max(0.0f, abs(delta)-blockWidth);
346
                        float dSquared = dot3(delta, delta);
347
348
                        if (dSquared > maxDistanceSquared)
                            continue;
peastman's avatar
peastman committed
349
                        
350
351
352
353
                        if (dSquared > refineCutoffSquared) {
                            // The distance is large enough that there might not be any actual interactions.
                            // Check individual atom pairs to be sure.
                            
354
355
356
357
358
                            bool anyInteraction = false;
                            for (int k = 0; k < (int) blockAtoms.size(); k += 4) {
                                fvec4 dx = fvec4(&blockAtomX[k])-atomPos[0];
                                fvec4 dy = fvec4(&blockAtomY[k])-atomPos[1];
                                fvec4 dz = fvec4(&blockAtomZ[k])-atomPos[2];
359
                                if (periodicRectangular) {
360
361
362
                                    dx -= round(dx*invBoxSize[0])*boxSize[0];
                                    dy -= round(dy*invBoxSize[1])*boxSize[1];
                                    dz -= round(dz*invBoxSize[2])*boxSize[2];
363
                                }
364
                                else if (needPeriodic) {
365
366
367
368
369
370
371
372
373
                                    fvec4 scale3 = floor(dz*recipBoxSize[2]+0.5f);
                                    dx -= scale3*periodicBoxVectors[2][0];
                                    dy -= scale3*periodicBoxVectors[2][1];
                                    dz -= scale3*periodicBoxVectors[2][2];
                                    fvec4 scale2 = floor(dy*recipBoxSize[1]+0.5f);
                                    dx -= scale2*periodicBoxVectors[1][0];
                                    dy -= scale2*periodicBoxVectors[1][1];
                                    fvec4 scale1 = floor(dx*recipBoxSize[0]+0.5f);
                                    dx -= scale1*periodicBoxVectors[0][0];
374
                                }
375
376
377
                                fvec4 r2 = dx*dx + dy*dy + dz*dz;
                                if (any(r2 < maxDistanceSquared)) {
                                    anyInteraction = true;
378
379
380
                                    break;
                                }
                            }
381
                            if (!anyInteraction)
382
383
384
385
386
387
                                continue;
                        }
                        
                        // Add this atom to the list of neighbors.
                        
                        neighbors.push_back(sortedAtoms[sortedIndex]);
388
                        if (sortedIndex < blockSize*blockIndex)
389
                            exclusions.push_back(0);
390
391
392
393
                        else {
                            int mask = (1<<blockSize)-1;
                            exclusions.push_back(mask & (mask<<(sortedIndex-blockSize*blockIndex)));
                        }
394
395
396
397
398
399
400
                    }
                }
            }
        }
    }

private:
401
    int blockSize;
402
403
404
405
406
    float voxelSizeY, voxelSizeZ;
    float miny, maxy, minz, maxz;
    int ny, nz;
    float periodicBoxSize[3], recipBoxSize[3];
    bool triclinic;
407
    float periodicBoxVectors[3][3];
408
    const bool usePeriodic;
peastman's avatar
peastman committed
409
    vector<vector<vector<pair<float, int> > > > bins;
410
411
};

412
class CpuNeighborList::ThreadTask : public ThreadPool::Task {
413
public:
414
415
416
417
    ThreadTask(CpuNeighborList& owner) : owner(owner) {
    }
    void execute(ThreadPool& threads, int threadIndex) {
        owner.threadComputeNeighborList(threads, threadIndex);
418
419
420
    }
    CpuNeighborList& owner;
};
421

422
CpuNeighborList::CpuNeighborList(int blockSize) : blockSize(blockSize) {
423
}
424

425
426
void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const vector<set<int> >& exclusions,
            const RealVec* periodicBoxVectors, bool usePeriodic, float maxDistance, ThreadPool& threads) {
427
    int numBlocks = (numAtoms+blockSize-1)/blockSize;
428
429
430
    blockNeighbors.resize(numBlocks);
    blockExclusions.resize(numBlocks);
    sortedAtoms.resize(numAtoms);
431
    sortedPositions.resize(4*numAtoms);
432
    
433
    // Record the parameters for the threads.
434
    
435
436
    this->exclusions = &exclusions;
    this->atomLocations = &atomLocations[0];
437
438
439
    this->periodicBoxVectors[0] = periodicBoxVectors[0];
    this->periodicBoxVectors[1] = periodicBoxVectors[1];
    this->periodicBoxVectors[2] = periodicBoxVectors[2];
440
441
442
443
444
445
446
447
    this->numAtoms = numAtoms;
    this->usePeriodic = usePeriodic;
    this->maxDistance = maxDistance;
    
    // Identify the range of atom positions along each axis.
    
    fvec4 minPos(&atomLocations[0]);
    fvec4 maxPos = minPos;
448
    for (int i = 0; i < numAtoms; i++) {
449
450
451
        fvec4 pos(&atomLocations[4*i]);
        minPos = min(minPos, pos);
        maxPos = max(maxPos, pos);
452
    }
453
454
455
456
457
458
459
460
461
462
    minx = minPos[0];
    maxx = maxPos[0];
    miny = minPos[1];
    maxy = maxPos[1];
    minz = minPos[2];
    maxz = maxPos[2];
    
    // Sort the atoms based on a Hilbert curve.
    
    atomBins.resize(numAtoms);
463
464
465
    ThreadTask task(*this);
    threads.execute(task);
    threads.waitForThreads();
466
467
    sort(atomBins.begin(), atomBins.end());

468
    // Build the voxel hash.
469

470
    float edgeSizeY, edgeSizeZ;
471
    if (!usePeriodic)
472
        edgeSizeY = edgeSizeZ = maxDistance; // TODO - adjust this as needed
473
    else {
474
475
        edgeSizeY = 0.6f*periodicBoxVectors[1][1]/floorf(periodicBoxVectors[1][1]/maxDistance);
        edgeSizeZ = 0.6f*periodicBoxVectors[2][2]/floorf(periodicBoxVectors[2][2]/maxDistance);
476
    }
477
    Voxels voxels(blockSize, edgeSizeY, edgeSizeZ, miny, maxy, minz, maxz, periodicBoxVectors, usePeriodic);
478
479
480
    for (int i = 0; i < numAtoms; i++) {
        int atomIndex = atomBins[i].second;
        sortedAtoms[i] = atomIndex;
481
482
        fvec4 atomPos(&atomLocations[4*atomIndex]);
        atomPos.store(&sortedPositions[4*i]);
483
        voxels.insert(i, &atomLocations[4*atomIndex]);
484
    }
485
486
    voxels.sortItems();
    this->voxels = &voxels;
487

488
489
    // Signal the threads to start running and wait for them to finish.
    
490
    gmx_atomic_set(&atomicCounter, 0);
491
492
    threads.resumeThreads();
    threads.waitForThreads();
493
    
494
    // Add padding atoms to fill up the last block.
495
    
496
    int numPadding = numBlocks*blockSize-numAtoms;
497
    if (numPadding > 0) {
498
        char mask = ((0xFFFF-(1<<blockSize)+1) >> numPadding);
499
500
501
502
503
504
        for (int i = 0; i < numPadding; i++)
            sortedAtoms.push_back(0);
        vector<char>& exc = blockExclusions[blockExclusions.size()-1];
        for (int i = 0; i < (int) exc.size(); i++)
            exc[i] |= mask;
    }
505
506
}

507
int CpuNeighborList::getNumBlocks() const {
508
    return sortedAtoms.size()/blockSize;
509
510
}

511
512
513
514
int CpuNeighborList::getBlockSize() const {
    return blockSize;
}

515
516
517
518
519
520
521
522
523
524
525
526
527
const std::vector<int>& CpuNeighborList::getSortedAtoms() const {
    return sortedAtoms;
}

const std::vector<int>& CpuNeighborList::getBlockNeighbors(int blockIndex) const {
    return blockNeighbors[blockIndex];
}

const std::vector<char>& CpuNeighborList::getBlockExclusions(int blockIndex) const {
    return blockExclusions[blockIndex];
    
}

528
529
void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadIndex) {
    // Compute the positions of atoms along the Hilbert curve.
530

531
532
533
534
535
536
537
538
539
540
541
542
543
    float binWidth = max(max(maxx-minx, maxy-miny), maxz-minz)/255.0f;
    float invBinWidth = 1.0f/binWidth;
    bitmask_t coords[3];
    int numThreads = threads.getNumThreads();
    for (int i = threadIndex; i < numAtoms; i += numThreads) {
        const float* pos = &atomLocations[4*i];
        coords[0] = (bitmask_t) ((pos[0]-minx)*invBinWidth);
        coords[1] = (bitmask_t) ((pos[1]-miny)*invBinWidth);
        coords[2] = (bitmask_t) ((pos[2]-minz)*invBinWidth);
        int bin = (int) hilbert_c2i(3, 8, coords);
        atomBins[i] = pair<int, int>(bin, i);
    }
    threads.syncThreads();
544

545
546
547
548
    // Compute this thread's subset of neighbors.

    int numBlocks = blockNeighbors.size();
    vector<int> blockAtoms;
549
    vector<float> blockAtomX(blockSize), blockAtomY(blockSize), blockAtomZ(blockSize);
550
    vector<VoxelIndex> atomVoxelIndex;
551
552
553
554
555
    while (true) {
        int i = gmx_atomic_fetch_add(&atomicCounter, 1);
        if (i >= numBlocks)
            break;

556
557
        // Find the atoms in this block and compute their bounding box.
        
558
559
        int firstIndex = blockSize*i;
        int atomsInBlock = min(blockSize, numAtoms-firstIndex);
560
        blockAtoms.resize(atomsInBlock);
561
562
        atomVoxelIndex.resize(atomsInBlock);
        for (int j = 0; j < atomsInBlock; j++) {
563
            blockAtoms[j] = sortedAtoms[firstIndex+j];
564
565
            atomVoxelIndex[j] = voxels->getVoxelIndex(&atomLocations[4*blockAtoms[j]]);
        }
566
        fvec4 minPos(&sortedPositions[4*firstIndex]);
567
568
        fvec4 maxPos = minPos;
        for (int j = 1; j < atomsInBlock; j++) {
569
            fvec4 pos(&sortedPositions[4*(firstIndex+j)]);
570
571
            minPos = min(minPos, pos);
            maxPos = max(maxPos, pos);
572
        }
573
574
575
576
577
578
579
580
581
582
583
        for (int j = 0; j < atomsInBlock; j++) {
            blockAtomX[j] = sortedPositions[4*(firstIndex+j)];
            blockAtomY[j] = sortedPositions[4*(firstIndex+j)+1];
            blockAtomZ[j] = sortedPositions[4*(firstIndex+j)+2];
        }
        for (int j = atomsInBlock; j < blockSize; j++) {
            blockAtomX[j] = 1e10;
            blockAtomY[j] = 1e10;
            blockAtomZ[j] = 1e10;
        }
        voxels->getNeighbors(blockNeighbors[i], i, (maxPos+minPos)*0.5f, (maxPos-minPos)*0.5f, sortedAtoms, blockExclusions[i], maxDistance, blockAtoms, blockAtomX, blockAtomY, blockAtomZ, sortedPositions, atomVoxelIndex);
584

585
        // Record the exclusions for this block.
586

587
        map<int, char> atomFlags;
588
589
590
        for (int j = 0; j < atomsInBlock; j++) {
            const set<int>& atomExclusions = (*exclusions)[sortedAtoms[firstIndex+j]];
            char mask = 1<<j;
591
592
593
594
595
596
            for (set<int>::const_iterator iter = atomExclusions.begin(); iter != atomExclusions.end(); ++iter) {
                map<int, char>::iterator thisAtomFlags = atomFlags.find(*iter);
                if (thisAtomFlags == atomFlags.end())
                    atomFlags[*iter] = mask;
                else
                    thisAtomFlags->second |= mask;
597
598
            }
        }
599
600
601
602
603
604
605
        int numNeighbors = blockNeighbors[i].size();
        for (int k = 0; k < numNeighbors; k++) {
            int atomIndex = blockNeighbors[i][k];
            map<int, char>::iterator thisAtomFlags = atomFlags.find(atomIndex);
            if (thisAtomFlags != atomFlags.end())
                blockExclusions[i][k] |= thisAtomFlags->second;
        }
606
607
608
    }
}

609
} // namespace OpenMM