CpuNeighborList.cpp 24.9 KB
Newer Older
1
2
3
4
5
6
7
8
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
9
 * Portions copyright (c) 2013-2016 Stanford University and the Authors.      *
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
 * Permission is hereby granted, free of charge, to any person obtaining a    *
 * copy of this software and associated documentation files (the "Software"), *
 * to deal in the Software without restriction, including without limitation  *
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
 * and/or sell copies of the Software, and to permit persons to whom the      *
 * Software is furnished to do so, subject to the following conditions:       *
 *                                                                            *
 * The above copyright notice and this permission notice shall be included in *
 * all copies or substantial portions of the Software.                        *
 *                                                                            *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
 * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
 * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
 * -------------------------------------------------------------------------- */

32
#include "CpuNeighborList.h"
33
#include "openmm/internal/hardware.h"
34
#include "openmm/internal/vectorize.h"
35
#include "hilbert.h"
36
#include <algorithm>
37
38
39
40
41
42
43
44
45
46
47
#include <set>
#include <map>
#include <cmath>

using namespace std;

namespace OpenMM {

class VoxelIndex 
{
public:
48
    VoxelIndex() : y(0), z(0) {
49
    }
50
    VoxelIndex(int y, int z) : y(y), z(z) {
51
    }
52
    int y;
53
    int z;
54
55
};

56
57
58
59
60
/**
 * This data structure organizes the particles spatially.  It divides them into bins along the x and y axes,
 * then sorts each bin along the z axis so ranges can be identified quickly with a binary search.
 */
class CpuNeighborList::Voxels {
61
public:
62
63
64
65
66
67
68
69
70
71
72
73
74
75
    Voxels(int blockSize, float vsy, float vsz, float miny, float maxy, float minz, float maxz, const RealVec* boxVectors, bool usePeriodic) :
            blockSize(blockSize), voxelSizeY(vsy), voxelSizeZ(vsz), miny(miny), maxy(maxy), minz(minz), maxz(maxz), usePeriodic(usePeriodic) {
        for (int i = 0; i < 3; i++)
            for (int j = 0; j < 3; j++)
                periodicBoxVectors[i][j] = (float) boxVectors[i][j];
        periodicBoxSize[0] = (float) boxVectors[0][0];
        periodicBoxSize[1] = (float) boxVectors[1][1];
        periodicBoxSize[2] = (float) boxVectors[2][2];
        recipBoxSize[0] = (float) (1/boxVectors[0][0]);
        recipBoxSize[1] = (float) (1/boxVectors[1][1]);
        recipBoxSize[2] = (float) (1/boxVectors[2][2]);
        triclinic = (boxVectors[0][1] != 0.0 || boxVectors[0][2] != 0.0 ||
                     boxVectors[1][0] != 0.0 || boxVectors[1][2] != 0.0 ||
                     boxVectors[2][0] != 0.0 || boxVectors[2][1] != 0.0);
76
        if (usePeriodic) {
77
78
79
80
            ny = (int) floorf(boxVectors[1][1]/voxelSizeY+0.5f);
            nz = (int) floorf(boxVectors[2][2]/voxelSizeZ+0.5f);
            voxelSizeY = boxVectors[1][1]/ny;
            voxelSizeZ = boxVectors[2][2]/nz;
81
82
83
        }
        else {
            ny = max(1, (int) floorf((maxy-miny)/voxelSizeY+0.5f));
84
            nz = max(1, (int) floorf((maxz-minz)/voxelSizeZ+0.5f));
85
86
            if (maxy > miny)
                voxelSizeY = (maxy-miny)/ny;
87
88
            if (maxz > minz)
                voxelSizeZ = (maxz-minz)/nz;
89
        }
90
91
92
93
        bins.resize(ny);
        for (int i = 0; i < ny; i++) {
            bins[i].resize(nz);
            for (int j = 0; j < nz; j++)
94
                bins[i][j].resize(0);
95
96
97
        }
    }

98
99
100
101
    /**
     * Insert a particle into the voxel data structure.
     */
    void insert(const int& atom, const float* location) {
102
        VoxelIndex voxelIndex = getVoxelIndex(location);
103
        bins[voxelIndex.y][voxelIndex.z].push_back(make_pair(location[0], atom));
104
105
106
    }
    
    /**
107
     * Sort the particles in each voxel by x coordinate.
108
109
     */
    void sortItems() {
110
111
        for (int i = 0; i < ny; i++)
            for (int j = 0; j < nz; j++)
112
                sort(bins[i][j].begin(), bins[i][j].end());
113
    }
114
    
115
    /**
116
     * Find the index of the first particle in voxel (y,z) whose x coordinate is >= the specified value.
117
     */
118
    int findLowerBound(int y, int z, double x, int lower, int upper) const {
119
        const vector<pair<float, int> >& bin = bins[y][z];
120
121
        while (lower < upper) {
            int middle = (lower+upper)/2;
122
            if (bin[middle].first < x)
123
124
125
126
127
128
129
130
                lower = middle+1;
            else
                upper = middle;
        }
        return lower;
    }
    
    /**
131
     * Find the index of the first particle in voxel (y,z) whose x coordinate is greater than the specified value.
132
     */
133
    int findUpperBound(int y, int z, double x, int lower, int upper) const {
134
        const vector<pair<float, int> >& bin = bins[y][z];
135
136
        while (lower < upper) {
            int middle = (lower+upper)/2;
137
            if (bin[middle].first > x)
138
139
140
141
142
143
                upper = middle;
            else
                lower = middle+1;
        }
        return upper;
    }
144

145
146
147
    /**
     * Get the voxel index containing a particular location.
     */
148
    VoxelIndex getVoxelIndex(const float* location) const {
149
        float yperiodic, zperiodic;
150
        if (!usePeriodic) {
151
            yperiodic = location[1]-miny;
152
            zperiodic = location[2]-minz;
153
154
        }
        else {
155
156
157
158
159
            float scale2 = floorf(location[2]*recipBoxSize[2]);
            yperiodic = location[1]-periodicBoxVectors[2][1]*scale2;
            zperiodic = location[2]-periodicBoxVectors[2][2]*scale2;
            float scale1 = floorf(yperiodic*recipBoxSize[1]);
            yperiodic -= periodicBoxVectors[1][0]*scale1;
160
        }
161
162
        int y = max(0, min(ny-1, int(floorf(yperiodic / voxelSizeY))));
        int z = max(0, min(nz-1, int(floorf(zperiodic / voxelSizeZ))));
163
        
164
        return VoxelIndex(y, z);
165
    }
166
167
        
    void getNeighbors(vector<int>& neighbors, int blockIndex, const fvec4& blockCenter, const fvec4& blockWidth, const vector<int>& sortedAtoms, vector<char>& exclusions, float maxDistance, const vector<int>& blockAtoms, const vector<float>& blockAtomX, const vector<float>& blockAtomY, const vector<float>& blockAtomZ, const vector<float>& sortedPositions, const vector<VoxelIndex>& atomVoxelIndex) const {
168
169
        neighbors.resize(0);
        exclusions.resize(0);
170
        fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
171
172
173
174
175
176
        fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0);
        fvec4 periodicBoxVec4[3];
        periodicBoxVec4[0] = fvec4(periodicBoxVectors[0][0], periodicBoxVectors[0][1], periodicBoxVectors[0][2], 0);
        periodicBoxVec4[1] = fvec4(periodicBoxVectors[1][0], periodicBoxVectors[1][1], periodicBoxVectors[1][2], 0);
        periodicBoxVec4[2] = fvec4(periodicBoxVectors[2][0], periodicBoxVectors[2][1], periodicBoxVectors[2][2], 0);

177
        float maxDistanceSquared = maxDistance * maxDistance;
178
179
        float refineCutoff = maxDistance-max(max(blockWidth[0], blockWidth[1]), blockWidth[2]);
        float refineCutoffSquared = refineCutoff*refineCutoff;
180

181
182
        int dIndexY = int((maxDistance+blockWidth[1])/voxelSizeY)+1; // How may voxels away do we have to look?
        int dIndexZ = int((maxDistance+blockWidth[2])/voxelSizeZ)+1;
peastman's avatar
peastman committed
183
184
        if (usePeriodic) {
            dIndexY = min(ny/2, dIndexY);
185
            dIndexZ = min(nz/2, dIndexZ);
peastman's avatar
peastman committed
186
        }
187
188
189
        float centerPos[4];
        blockCenter.store(centerPos);
        VoxelIndex centerVoxelIndex = getVoxelIndex(centerPos);
190
191
192
193
194
195
196

        // Loop over voxels along the z axis.

        int startz = centerVoxelIndex.z-dIndexZ;
        int endz = centerVoxelIndex.z+dIndexZ;
        if (usePeriodic)
            endz = min(endz, startz+nz-1);
197
        else {
198
199
            startz = max(startz, 0);
            endz = min(endz, nz-1);
200
        }
201
        int lastSortedIndex = blockSize*(blockIndex+1);
202
        VoxelIndex voxelIndex(0, 0);
203
204
        for (int z = startz; z <= endz; ++z) {
            voxelIndex.z = z;
205
            if (usePeriodic)
206
207
208
209
                voxelIndex.z = (z < 0 ? z+nz : (z >= nz ? z-nz : z));

            // Loop over voxels along the y axis.

210
            float boxz = floor((float) z/nz);
211
212
213
214
215
216
217
218
219
220
221
222
            int starty = centerVoxelIndex.y-dIndexY;
            int endy = centerVoxelIndex.y+dIndexY;
            float yoffset = (float) (usePeriodic ? boxz*periodicBoxVectors[2][1] : 0);
            if (usePeriodic) {
                starty -= (int) ceil(yoffset/voxelSizeY);
                endy -= (int) floor(yoffset/voxelSizeY);
                endy = min(endy, starty+ny-1);
            }
            else {
                starty = max(starty, 0);
                endy = min(endy, ny-1);
            }
223
            for (int y = starty; y <= endy; ++y) {
224
225
226
                voxelIndex.y = y;
                if (usePeriodic)
                    voxelIndex.y = (y < 0 ? y+ny : (y >= ny ? y-ny : y));
227
                float boxy = floor((float) y/ny);
228
                float xoffset = (float) (usePeriodic ? boxy*periodicBoxVectors[1][0]+boxz*periodicBoxVectors[2][0] : 0);
229
230
231
232
                
                // Identify the range of atoms within this bin we need to search.  When using periodic boundary
                // conditions, there may be two separate ranges.
                
233
234
                float minx = centerPos[0];
                float maxx = centerPos[0];
peastman's avatar
peastman committed
235
236
237
238
239
240
241
242
243
                fvec4 offset(-xoffset, -yoffset+voxelSizeY*y+(usePeriodic ? 0.0f : miny), voxelSizeZ*z+(usePeriodic ? 0.0f : minz), 0);
                for (int k = 0; k < (int) blockAtoms.size(); k++) {
                    const float* atomPos = &sortedPositions[4*(blockSize*blockIndex+k)];
                    fvec4 posVec(atomPos);
                    fvec4 delta1 = offset-posVec;
                    fvec4 delta2 = delta1+fvec4(0, voxelSizeY, voxelSizeZ, 0);
                    if (usePeriodic) {
                        delta1 -= round(delta1*invBoxSize)*boxSize;
                        delta2 -= round(delta2*invBoxSize)*boxSize;
244
                    }
peastman's avatar
peastman committed
245
246
247
248
249
250
251
252
                    fvec4 delta = min(abs(delta1), abs(delta2));
                    float dy = (y == atomVoxelIndex[k].y ? 0.0f : delta[1]);
                    float dz = (z == atomVoxelIndex[k].z ? 0.0f : delta[2]);
                    float dist2 = maxDistanceSquared-dy*dy-dz*dz;
                    if (dist2 > 0) {
                        float dist = sqrtf(dist2);
                        minx = min(minx, atomPos[0]-dist-xoffset);
                        maxx = max(maxx, atomPos[0]+dist-xoffset);
253
254
                    }
                }
255
                if (minx == maxx)
peastman's avatar
peastman committed
256
                    continue;
257
258
259
260
                bool needPeriodic = (centerPos[1]-blockWidth[1] < maxDistance || centerPos[1]+blockWidth[1] > periodicBoxSize[1]-maxDistance ||
                                     centerPos[2]-blockWidth[2] < maxDistance || centerPos[2]+blockWidth[2] > periodicBoxSize[2]-maxDistance ||
                                     minx < 0.0f || maxx > periodicBoxVectors[0][0]);
                int numRanges;
261
262
                int rangeStart[2];
                int rangeEnd[2];
263
264
                int binSize = bins[voxelIndex.y][voxelIndex.z].size();
                rangeStart[0] = findLowerBound(voxelIndex.y, voxelIndex.z, minx, 0, binSize);
265
266
                if (needPeriodic) {
                    numRanges = 2;
267
268
269
270
                    rangeEnd[0] = findUpperBound(voxelIndex.y, voxelIndex.z, maxx, rangeStart[0], binSize);
                    if (rangeStart[0] > 0 && rangeEnd[0] < binSize)
                        numRanges = 1;
                    else if (rangeStart[0] > 0) {
271
                        rangeStart[1] = 0;
272
                        rangeEnd[1] = min(findUpperBound(voxelIndex.y, voxelIndex.z, maxx-periodicBoxSize[0], 0, rangeStart[0]), rangeStart[0]);
273
274
                    }
                    else {
275
                        rangeStart[1] = max(findLowerBound(voxelIndex.y, voxelIndex.z, minx+periodicBoxSize[0], rangeEnd[0], binSize), rangeEnd[0]);
276
                        rangeEnd[1] = bins[voxelIndex.y][voxelIndex.z].size();
277
278
                    }
                }
279
280
                else {
                    numRanges = 1;
281
                    rangeEnd[0] = findUpperBound(voxelIndex.y, voxelIndex.z, maxx, rangeStart[0], binSize);
282
                }
283
                bool periodicRectangular = (needPeriodic && !triclinic);
284
285
286
                
                // Loop over atoms and check to see if they are neighbors of this block.
                
287
                const vector<pair<float, int> >& voxelBins = bins[voxelIndex.y][voxelIndex.z];
288
289
                for (int range = 0; range < numRanges; range++) {
                    for (int item = rangeStart[range]; item < rangeEnd[range]; item++) {
290
                        const int sortedIndex = voxelBins[item].second;
291

peastman's avatar
peastman committed
292
                        // Avoid duplicate entries.
293
                        if (sortedIndex >= lastSortedIndex)
294
                            continue;
295
                        
296
                        fvec4 atomPos(&sortedPositions[4*sortedIndex]);
297
                        fvec4 delta = atomPos-centerPos;
298
299
                        if (periodicRectangular)
                            delta -= round(delta*invBoxSize)*boxSize;
300
301
302
303
304
                        else if (needPeriodic) {
                            delta -= periodicBoxVec4[2]*floorf(delta[2]*recipBoxSize[2]+0.5f);
                            delta -= periodicBoxVec4[1]*floorf(delta[1]*recipBoxSize[1]+0.5f);
                            delta -= periodicBoxVec4[0]*floorf(delta[0]*recipBoxSize[0]+0.5f);
                        }
305
                        delta = max(0.0f, abs(delta)-blockWidth);
306
                        float dSquared = dot3(delta, delta);
307
308
                        if (dSquared > maxDistanceSquared)
                            continue;
peastman's avatar
peastman committed
309
                        
310
311
312
313
                        if (dSquared > refineCutoffSquared) {
                            // The distance is large enough that there might not be any actual interactions.
                            // Check individual atom pairs to be sure.
                            
314
315
316
317
318
                            bool anyInteraction = false;
                            for (int k = 0; k < (int) blockAtoms.size(); k += 4) {
                                fvec4 dx = fvec4(&blockAtomX[k])-atomPos[0];
                                fvec4 dy = fvec4(&blockAtomY[k])-atomPos[1];
                                fvec4 dz = fvec4(&blockAtomZ[k])-atomPos[2];
319
                                if (periodicRectangular) {
320
321
322
                                    dx -= round(dx*invBoxSize[0])*boxSize[0];
                                    dy -= round(dy*invBoxSize[1])*boxSize[1];
                                    dz -= round(dz*invBoxSize[2])*boxSize[2];
323
                                }
324
                                else if (needPeriodic) {
325
326
327
328
329
330
331
332
333
                                    fvec4 scale3 = floor(dz*recipBoxSize[2]+0.5f);
                                    dx -= scale3*periodicBoxVectors[2][0];
                                    dy -= scale3*periodicBoxVectors[2][1];
                                    dz -= scale3*periodicBoxVectors[2][2];
                                    fvec4 scale2 = floor(dy*recipBoxSize[1]+0.5f);
                                    dx -= scale2*periodicBoxVectors[1][0];
                                    dy -= scale2*periodicBoxVectors[1][1];
                                    fvec4 scale1 = floor(dx*recipBoxSize[0]+0.5f);
                                    dx -= scale1*periodicBoxVectors[0][0];
334
                                }
335
336
337
                                fvec4 r2 = dx*dx + dy*dy + dz*dz;
                                if (any(r2 < maxDistanceSquared)) {
                                    anyInteraction = true;
338
339
340
                                    break;
                                }
                            }
341
                            if (!anyInteraction)
342
343
344
345
346
347
                                continue;
                        }
                        
                        // Add this atom to the list of neighbors.
                        
                        neighbors.push_back(sortedAtoms[sortedIndex]);
348
                        if (sortedIndex < blockSize*blockIndex)
349
                            exclusions.push_back(0);
350
351
352
353
                        else {
                            int mask = (1<<blockSize)-1;
                            exclusions.push_back(mask & (mask<<(sortedIndex-blockSize*blockIndex)));
                        }
354
355
356
357
358
359
360
                    }
                }
            }
        }
    }

private:
361
    int blockSize;
362
363
364
365
366
    float voxelSizeY, voxelSizeZ;
    float miny, maxy, minz, maxz;
    int ny, nz;
    float periodicBoxSize[3], recipBoxSize[3];
    bool triclinic;
367
    float periodicBoxVectors[3][3];
368
    const bool usePeriodic;
peastman's avatar
peastman committed
369
    vector<vector<vector<pair<float, int> > > > bins;
370
371
};

372
class CpuNeighborList::ThreadTask : public ThreadPool::Task {
373
public:
374
375
376
377
    ThreadTask(CpuNeighborList& owner) : owner(owner) {
    }
    void execute(ThreadPool& threads, int threadIndex) {
        owner.threadComputeNeighborList(threads, threadIndex);
378
379
380
    }
    CpuNeighborList& owner;
};
381

382
CpuNeighborList::CpuNeighborList(int blockSize) : blockSize(blockSize) {
383
}
384

385
386
void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const vector<set<int> >& exclusions,
            const RealVec* periodicBoxVectors, bool usePeriodic, float maxDistance, ThreadPool& threads) {
387
    int numBlocks = (numAtoms+blockSize-1)/blockSize;
388
389
390
    blockNeighbors.resize(numBlocks);
    blockExclusions.resize(numBlocks);
    sortedAtoms.resize(numAtoms);
391
    sortedPositions.resize(4*numAtoms);
392
    
393
    // Record the parameters for the threads.
394
    
395
396
    this->exclusions = &exclusions;
    this->atomLocations = &atomLocations[0];
397
398
399
    this->periodicBoxVectors[0] = periodicBoxVectors[0];
    this->periodicBoxVectors[1] = periodicBoxVectors[1];
    this->periodicBoxVectors[2] = periodicBoxVectors[2];
400
401
402
403
404
405
406
407
    this->numAtoms = numAtoms;
    this->usePeriodic = usePeriodic;
    this->maxDistance = maxDistance;
    
    // Identify the range of atom positions along each axis.
    
    fvec4 minPos(&atomLocations[0]);
    fvec4 maxPos = minPos;
408
    for (int i = 0; i < numAtoms; i++) {
409
410
411
        fvec4 pos(&atomLocations[4*i]);
        minPos = min(minPos, pos);
        maxPos = max(maxPos, pos);
412
    }
413
414
415
416
417
418
419
420
421
422
    minx = minPos[0];
    maxx = maxPos[0];
    miny = minPos[1];
    maxy = maxPos[1];
    minz = minPos[2];
    maxz = maxPos[2];
    
    // Sort the atoms based on a Hilbert curve.
    
    atomBins.resize(numAtoms);
423
424
425
    ThreadTask task(*this);
    threads.execute(task);
    threads.waitForThreads();
426
427
    sort(atomBins.begin(), atomBins.end());

428
    // Build the voxel hash.
429

430
    float edgeSizeY, edgeSizeZ;
431
    if (!usePeriodic)
432
        edgeSizeY = edgeSizeZ = maxDistance; // TODO - adjust this as needed
433
    else {
434
435
        edgeSizeY = 0.6f*periodicBoxVectors[1][1]/floorf(periodicBoxVectors[1][1]/maxDistance);
        edgeSizeZ = 0.6f*periodicBoxVectors[2][2]/floorf(periodicBoxVectors[2][2]/maxDistance);
436
    }
437
    Voxels voxels(blockSize, edgeSizeY, edgeSizeZ, miny, maxy, minz, maxz, periodicBoxVectors, usePeriodic);
438
439
440
    for (int i = 0; i < numAtoms; i++) {
        int atomIndex = atomBins[i].second;
        sortedAtoms[i] = atomIndex;
441
442
        fvec4 atomPos(&atomLocations[4*atomIndex]);
        atomPos.store(&sortedPositions[4*i]);
443
        voxels.insert(i, &atomLocations[4*atomIndex]);
444
    }
445
446
    voxels.sortItems();
    this->voxels = &voxels;
447

448
449
    // Signal the threads to start running and wait for them to finish.
    
450
    gmx_atomic_set(&atomicCounter, 0);
451
452
    threads.resumeThreads();
    threads.waitForThreads();
453
    
454
    // Add padding atoms to fill up the last block.
455
    
456
    int numPadding = numBlocks*blockSize-numAtoms;
457
    if (numPadding > 0) {
458
        char mask = ((0xFFFF-(1<<blockSize)+1) >> numPadding);
459
460
461
462
463
464
        for (int i = 0; i < numPadding; i++)
            sortedAtoms.push_back(0);
        vector<char>& exc = blockExclusions[blockExclusions.size()-1];
        for (int i = 0; i < (int) exc.size(); i++)
            exc[i] |= mask;
    }
465
466
}

467
int CpuNeighborList::getNumBlocks() const {
468
    return sortedAtoms.size()/blockSize;
469
470
}

471
472
473
474
475
476
477
478
479
480
481
482
483
const std::vector<int>& CpuNeighborList::getSortedAtoms() const {
    return sortedAtoms;
}

const std::vector<int>& CpuNeighborList::getBlockNeighbors(int blockIndex) const {
    return blockNeighbors[blockIndex];
}

const std::vector<char>& CpuNeighborList::getBlockExclusions(int blockIndex) const {
    return blockExclusions[blockIndex];
    
}

484
485
void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadIndex) {
    // Compute the positions of atoms along the Hilbert curve.
486

487
488
489
490
491
492
493
494
495
496
497
498
499
    float binWidth = max(max(maxx-minx, maxy-miny), maxz-minz)/255.0f;
    float invBinWidth = 1.0f/binWidth;
    bitmask_t coords[3];
    int numThreads = threads.getNumThreads();
    for (int i = threadIndex; i < numAtoms; i += numThreads) {
        const float* pos = &atomLocations[4*i];
        coords[0] = (bitmask_t) ((pos[0]-minx)*invBinWidth);
        coords[1] = (bitmask_t) ((pos[1]-miny)*invBinWidth);
        coords[2] = (bitmask_t) ((pos[2]-minz)*invBinWidth);
        int bin = (int) hilbert_c2i(3, 8, coords);
        atomBins[i] = pair<int, int>(bin, i);
    }
    threads.syncThreads();
500

501
502
503
504
    // Compute this thread's subset of neighbors.

    int numBlocks = blockNeighbors.size();
    vector<int> blockAtoms;
505
    vector<float> blockAtomX(blockSize), blockAtomY(blockSize), blockAtomZ(blockSize);
506
    vector<VoxelIndex> atomVoxelIndex;
507
508
509
510
511
    while (true) {
        int i = gmx_atomic_fetch_add(&atomicCounter, 1);
        if (i >= numBlocks)
            break;

512
513
        // Find the atoms in this block and compute their bounding box.
        
514
515
        int firstIndex = blockSize*i;
        int atomsInBlock = min(blockSize, numAtoms-firstIndex);
516
        blockAtoms.resize(atomsInBlock);
517
518
        atomVoxelIndex.resize(atomsInBlock);
        for (int j = 0; j < atomsInBlock; j++) {
519
            blockAtoms[j] = sortedAtoms[firstIndex+j];
520
521
            atomVoxelIndex[j] = voxels->getVoxelIndex(&atomLocations[4*blockAtoms[j]]);
        }
522
        fvec4 minPos(&sortedPositions[4*firstIndex]);
523
524
        fvec4 maxPos = minPos;
        for (int j = 1; j < atomsInBlock; j++) {
525
            fvec4 pos(&sortedPositions[4*(firstIndex+j)]);
526
527
            minPos = min(minPos, pos);
            maxPos = max(maxPos, pos);
528
        }
529
530
531
532
533
534
535
536
537
538
539
        for (int j = 0; j < atomsInBlock; j++) {
            blockAtomX[j] = sortedPositions[4*(firstIndex+j)];
            blockAtomY[j] = sortedPositions[4*(firstIndex+j)+1];
            blockAtomZ[j] = sortedPositions[4*(firstIndex+j)+2];
        }
        for (int j = atomsInBlock; j < blockSize; j++) {
            blockAtomX[j] = 1e10;
            blockAtomY[j] = 1e10;
            blockAtomZ[j] = 1e10;
        }
        voxels->getNeighbors(blockNeighbors[i], i, (maxPos+minPos)*0.5f, (maxPos-minPos)*0.5f, sortedAtoms, blockExclusions[i], maxDistance, blockAtoms, blockAtomX, blockAtomY, blockAtomZ, sortedPositions, atomVoxelIndex);
540

541
        // Record the exclusions for this block.
542

543
        map<int, char> atomFlags;
544
545
546
        for (int j = 0; j < atomsInBlock; j++) {
            const set<int>& atomExclusions = (*exclusions)[sortedAtoms[firstIndex+j]];
            char mask = 1<<j;
547
548
549
550
551
552
            for (set<int>::const_iterator iter = atomExclusions.begin(); iter != atomExclusions.end(); ++iter) {
                map<int, char>::iterator thisAtomFlags = atomFlags.find(*iter);
                if (thisAtomFlags == atomFlags.end())
                    atomFlags[*iter] = mask;
                else
                    thisAtomFlags->second |= mask;
553
554
            }
        }
555
556
557
558
559
560
561
        int numNeighbors = blockNeighbors[i].size();
        for (int k = 0; k < numNeighbors; k++) {
            int atomIndex = blockNeighbors[i][k];
            map<int, char>::iterator thisAtomFlags = atomFlags.find(atomIndex);
            if (thisAtomFlags != atomFlags.end())
                blockExclusions[i][k] |= thisAtomFlags->second;
        }
562
563
564
    }
}

565
} // namespace OpenMM