CpuNeighborList.cpp 19.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
 * Portions copyright (c) 2013 Stanford University and the Authors.           *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
 * Permission is hereby granted, free of charge, to any person obtaining a    *
 * copy of this software and associated documentation files (the "Software"), *
 * to deal in the Software without restriction, including without limitation  *
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
 * and/or sell copies of the Software, and to permit persons to whom the      *
 * Software is furnished to do so, subject to the following conditions:       *
 *                                                                            *
 * The above copyright notice and this permission notice shall be included in *
 * all copies or substantial portions of the Software.                        *
 *                                                                            *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
 * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
 * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
 * -------------------------------------------------------------------------- */

32
#include "CpuNeighborList.h"
33
#include "openmm/internal/hardware.h"
34
#include "openmm/internal/vectorize.h"
35
#include "hilbert.h"
36
#include <algorithm>
37
38
39
#include <set>
#include <map>
#include <cmath>
40
#include <smmintrin.h>
41
42
43
44
45

using namespace std;

namespace OpenMM {

46
47
const int CpuNeighborList::BlockSize = 4;

48
49
50
class VoxelIndex 
{
public:
51
52
    VoxelIndex() : x(0), y(0) {
    }
53
    VoxelIndex(int x, int y) : x(x), y(y) {
54
    }
55
56
57
58
    int x;
    int y;
};

59
60
61
62
63
/**
 * This data structure organizes the particles spatially.  It divides them into bins along the x and y axes,
 * then sorts each bin along the z axis so ranges can be identified quickly with a binary search.
 */
class CpuNeighborList::Voxels {
64
public:
65
66
    Voxels(float vsx, float vsy, float minx, float maxx, float miny, float maxy, const float* periodicBoxSize, bool usePeriodic) :
            voxelSizeX(vsx), voxelSizeY(vsy), minx(minx), maxx(maxx), miny(miny), maxy(maxy), periodicBoxSize(periodicBoxSize), usePeriodic(usePeriodic) {
67
68
69
        if (usePeriodic) {
            nx = (int) floorf(periodicBoxSize[0]/voxelSizeX+0.5f);
            ny = (int) floorf(periodicBoxSize[1]/voxelSizeY+0.5f);
70
            voxelSizeX = periodicBoxSize[0]/nx;
peastman's avatar
Bug fix  
peastman committed
71
            voxelSizeY = periodicBoxSize[1]/ny;
72
73
74
75
76
77
78
79
80
81
82
83
84
85
        }
        else {
            nx = max(1, (int) floorf((maxx-minx)/voxelSizeX+0.5f));
            ny = max(1, (int) floorf((maxy-miny)/voxelSizeY+0.5f));
            if (maxx > minx)
                voxelSizeX = (maxx-minx)/nx;
            if (maxy > miny)
                voxelSizeY = (maxy-miny)/ny;
        }
        bins.resize(nx);
        for (int i = 0; i < nx; i++) {
            bins[i].resize(ny);
            for (int j = 0; j < ny; j++)
                bins[i][j].resize(0);
86
87
88
        }
    }

89
90
91
92
    /**
     * Insert a particle into the voxel data structure.
     */
    void insert(const int& atom, const float* location) {
93
        VoxelIndex voxelIndex = getVoxelIndex(location);
peastman's avatar
peastman committed
94
        bins[voxelIndex.x][voxelIndex.y].push_back(make_pair(location[2], atom));
95
96
97
98
99
100
101
102
103
    }
    
    /**
     * Sort the particles in each voxel by z coordinate.
     */
    void sortItems() {
        for (int i = 0; i < nx; i++)
            for (int j = 0; j < ny; j++)
                sort(bins[i][j].begin(), bins[i][j].end());
104
    }
105
    
106
107
108
109
    /**
     * Find the index of the first particle in voxel (x,y) whose z coordinate in >= the specified value.
     */
    int findLowerBound(int x, int y, double z) const {
peastman's avatar
peastman committed
110
        const vector<pair<float, int> >& bin = bins[x][y];
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
        int lower = 0;
        int upper = bin.size();
        while (lower < upper) {
            int middle = (lower+upper)/2;
            if (bin[middle].first < z)
                lower = middle+1;
            else
                upper = middle;
        }
        return lower;
    }
    
    /**
     * Find the index of the first particle in voxel (x,y) whose z coordinate in greater than the specified value.
     */
    int findUpperBound(int x, int y, double z) const {
peastman's avatar
peastman committed
127
        const vector<pair<float, int> >& bin = bins[x][y];
128
129
130
131
132
133
134
135
136
137
138
        int lower = 0;
        int upper = bin.size();
        while (lower < upper) {
            int middle = (lower+upper)/2;
            if (bin[middle].first > z)
                upper = middle;
            else
                lower = middle+1;
        }
        return upper;
    }
139

140
141
142
    /**
     * Get the voxel index containing a particular location.
     */
143
    VoxelIndex getVoxelIndex(const float* location) const {
144
        float xperiodic, yperiodic;
145
        if (!usePeriodic) {
146
147
            xperiodic = location[0]-minx;
            yperiodic = location[1]-miny;
148
149
150
151
152
        }
        else {
            xperiodic = location[0]-periodicBoxSize[0]*floorf(location[0]/periodicBoxSize[0]);
            yperiodic = location[1]-periodicBoxSize[1]*floorf(location[1]/periodicBoxSize[1]);
        }
153
154
        int x = min(nx-1, int(floorf(xperiodic / voxelSizeX)));
        int y = min(ny-1, int(floorf(yperiodic / voxelSizeY)));
155
        
156
        return VoxelIndex(x, y);
157
158
    }

159
160
161
    void getNeighbors(vector<int>& neighbors, int blockIndex, fvec4 blockCenter, fvec4 blockWidth, const vector<int>& sortedAtoms, vector<char>& exclusions, float maxDistance, const vector<int> blockAtoms, const float* atomLocations) const {
        neighbors.resize(0);
        exclusions.resize(0);
162
163
        fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
        fvec4 invBoxSize(1/periodicBoxSize[0], 1/periodicBoxSize[1], 1/periodicBoxSize[2], 0);
164
165
        
        float maxDistanceSquared = maxDistance * maxDistance;
166
167
        float refineCutoff = maxDistance-max(max(blockWidth[0], blockWidth[1]), blockWidth[2]);
        float refineCutoffSquared = refineCutoff*refineCutoff;
168

peastman's avatar
peastman committed
169
170
171
172
173
174
        int dIndexX = int((maxDistance+blockWidth[0])/voxelSizeX)+1; // How may voxels away do we have to look?
        int dIndexY = int((maxDistance+blockWidth[1])/voxelSizeY)+1;
        if (usePeriodic) {
            dIndexX = min(nx/2, dIndexX);
            dIndexY = min(ny/2, dIndexY);
        }
175
176
177
        float centerPos[4];
        blockCenter.store(centerPos);
        VoxelIndex centerVoxelIndex = getVoxelIndex(centerPos);
178
179
180
        VoxelIndex atomVoxelIndex[BlockSize];
        for (int i = 0; i < (int) blockAtoms.size(); i++)
            atomVoxelIndex[i] = getVoxelIndex(&atomLocations[4*blockAtoms[i]]);
181
182
183
184
185
        int startx = centerVoxelIndex.x-dIndexX;
        int starty = centerVoxelIndex.y-dIndexY;
        int endx = centerVoxelIndex.x+dIndexX;
        int endy = centerVoxelIndex.y+dIndexY;
        int numRanges;
186
        if (usePeriodic) {
187
188
189
190
191
192
193
194
            endx = min(endx, centerVoxelIndex.x-dIndexX+nx-1);
            endy = min(endy, centerVoxelIndex.y-dIndexY+ny-1);
        }
        else {
            startx = max(startx, 0);
            starty = max(starty, 0);
            endx = min(endx, nx-1);
            endy = min(endy, ny-1);
195
        }
196
        int lastSortedIndex = BlockSize*(blockIndex+1);
197
198
        VoxelIndex voxelIndex(0, 0);
        for (int x = startx; x <= endx; ++x) {
199
200
201
            voxelIndex.x = x;
            if (usePeriodic)
                voxelIndex.x = (x < 0 ? x+nx : (x >= nx ? x-nx : x));
202
            for (int y = starty; y <= endy; ++y) {
203
204
205
                voxelIndex.y = y;
                if (usePeriodic)
                    voxelIndex.y = (y < 0 ? y+ny : (y >= ny ? y-ny : y));
206
207
208
209
                
                // Identify the range of atoms within this bin we need to search.  When using periodic boundary
                // conditions, there may be two separate ranges.
                
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
                float minz = centerPos[2];
                float maxz = centerPos[2];
                fvec4 offset(voxelSizeX*x+(usePeriodic ? 0.0f : minx), voxelSizeY*y+(usePeriodic ? 0.0f : miny), 0, 0);
                for (int k = 0; k < (int) blockAtoms.size(); k++) {
                    const float* atomPos = &atomLocations[4*blockAtoms[k]];
                    fvec4 posVec(atomPos);
                    fvec4 delta1 = offset-posVec;
                    fvec4 delta2 = delta1+fvec4(voxelSizeX, voxelSizeY, 0, 0);
                    if (usePeriodic) {
                        delta1 -= round(delta1*invBoxSize)*boxSize;
                        delta2 -= round(delta2*invBoxSize)*boxSize;
                    }
                    fvec4 delta = min(abs(delta1), abs(delta2));
                    float dx = (x == atomVoxelIndex[k].x ? 0.0f : delta[0]);
                    float dy = (y == atomVoxelIndex[k].y ? 0.0f : delta[1]);
                    float dist2 = maxDistanceSquared-dx*dx-dy*dy;
                    if (dist2 > 0) {
                        float dist = sqrtf(dist2);
                        minz = min(minz, atomPos[2]-dist);
                        maxz = max(maxz, atomPos[2]+dist);
                    }
                }
peastman's avatar
peastman committed
232
233
                bool needPeriodic = (centerPos[0]-blockWidth[0] < maxDistance || centerPos[0]+blockWidth[0] > periodicBoxSize[0]-maxDistance ||
                                     centerPos[1]-blockWidth[1] < maxDistance || centerPos[1]+blockWidth[1] > periodicBoxSize[1]-maxDistance ||
234
                                     minz < 0.0f || maxz > periodicBoxSize[2]);
235
236
                int rangeStart[2];
                int rangeEnd[2];
237
                rangeStart[0] = findLowerBound(voxelIndex.x, voxelIndex.y, minz);
238
239
                if (needPeriodic) {
                    numRanges = 2;
240
                    rangeEnd[0] = findUpperBound(voxelIndex.x, voxelIndex.y, maxz);
241
242
                    if (rangeStart[0] > 0) {
                        rangeStart[1] = 0;
243
                        rangeEnd[1] = min(findUpperBound(voxelIndex.x, voxelIndex.y, maxz-periodicBoxSize[2]), rangeStart[0]);
244
245
                    }
                    else {
246
                        rangeStart[1] = max(findLowerBound(voxelIndex.x, voxelIndex.y, minz+periodicBoxSize[2]), rangeEnd[0]);
247
248
249
                        rangeEnd[1] = bins[voxelIndex.x][voxelIndex.y].size();
                    }
                }
250
251
                else {
                    numRanges = 1;
252
                    rangeEnd[0] = findUpperBound(voxelIndex.x, voxelIndex.y, maxz);
253
                }
254
255
256
257
258
                
                // Loop over atoms and check to see if they are neighbors of this block.
                
                for (int range = 0; range < numRanges; range++) {
                    for (int item = rangeStart[range]; item < rangeEnd[range]; item++) {
peastman's avatar
peastman committed
259
                        const int sortedIndex = bins[voxelIndex.x][voxelIndex.y][item].second;
260

peastman's avatar
peastman committed
261
                        // Avoid duplicate entries.
262
                        if (sortedIndex >= lastSortedIndex)
263
                            continue;
264
                        
peastman's avatar
peastman committed
265
                        fvec4 atomPos(atomLocations+4*sortedAtoms[sortedIndex]);
266
                        fvec4 delta = atomPos-centerPos;
267
                        if (needPeriodic) {
268
269
                            fvec4 base = round(delta*invBoxSize)*boxSize;
                            delta = delta-base;
270
                        }
271
                        delta = max(0.0f, abs(delta)-blockWidth);
272
                        float dSquared = dot3(delta, delta);
273
274
                        if (dSquared > maxDistanceSquared)
                            continue;
peastman's avatar
peastman committed
275
                        
276
277
278
279
280
281
282
283
                        if (dSquared > refineCutoffSquared) {
                            // The distance is large enough that there might not be any actual interactions.
                            // Check individual atom pairs to be sure.
                            
                            bool any = false;
                            for (int k = 0; k < (int) blockAtoms.size(); k++) {
                                fvec4 pos1(&atomLocations[4*blockAtoms[k]]);
                                delta = atomPos-pos1;
284
                                if (needPeriodic) {
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
                                    fvec4 base = round(delta*invBoxSize)*boxSize;
                                    delta = delta-base;
                                }
                                float r2 = dot3(delta, delta);
                                if (r2 < maxDistanceSquared) {
                                    any = true;
                                    break;
                                }
                            }
                            if (!any)
                                continue;
                        }
                        
                        // Add this atom to the list of neighbors.
                        
                        neighbors.push_back(sortedAtoms[sortedIndex]);
                        if (sortedIndex < BlockSize*blockIndex)
                            exclusions.push_back(0);
                        else
                            exclusions.push_back(0xF & (0xF<<(sortedIndex-BlockSize*blockIndex)));
305
306
307
308
309
310
311
                    }
                }
            }
        }
    }

private:
312
313
314
    float voxelSizeX, voxelSizeY;
    float minx, maxx, miny, maxy;
    int nx, ny;
315
316
    const float* periodicBoxSize;
    const bool usePeriodic;
peastman's avatar
peastman committed
317
    vector<vector<vector<pair<float, int> > > > bins;
318
319
};

320
class CpuNeighborList::ThreadTask : public ThreadPool::Task {
321
public:
322
323
324
325
    ThreadTask(CpuNeighborList& owner) : owner(owner) {
    }
    void execute(ThreadPool& threads, int threadIndex) {
        owner.threadComputeNeighborList(threads, threadIndex);
326
327
328
    }
    CpuNeighborList& owner;
};
329

330
331
CpuNeighborList::CpuNeighborList() {
}
332

333
void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const vector<set<int> >& exclusions,
334
            const float* periodicBoxSize, bool usePeriodic, float maxDistance, ThreadPool& threads) {
335
336
337
338
339
    int numBlocks = (numAtoms+BlockSize-1)/BlockSize;
    blockNeighbors.resize(numBlocks);
    blockExclusions.resize(numBlocks);
    sortedAtoms.resize(numAtoms);
    
340
    // Record the parameters for the threads.
341
    
342
343
344
345
346
347
348
349
350
351
352
    this->exclusions = &exclusions;
    this->atomLocations = &atomLocations[0];
    this->periodicBoxSize = periodicBoxSize;
    this->numAtoms = numAtoms;
    this->usePeriodic = usePeriodic;
    this->maxDistance = maxDistance;
    
    // Identify the range of atom positions along each axis.
    
    fvec4 minPos(&atomLocations[0]);
    fvec4 maxPos = minPos;
353
    for (int i = 0; i < numAtoms; i++) {
354
355
356
        fvec4 pos(&atomLocations[4*i]);
        minPos = min(minPos, pos);
        maxPos = max(maxPos, pos);
357
    }
358
359
360
361
362
363
364
365
366
367
    minx = minPos[0];
    maxx = maxPos[0];
    miny = minPos[1];
    maxy = maxPos[1];
    minz = minPos[2];
    maxz = maxPos[2];
    
    // Sort the atoms based on a Hilbert curve.
    
    atomBins.resize(numAtoms);
368
369
370
    ThreadTask task(*this);
    threads.execute(task);
    threads.waitForThreads();
371
372
    sort(atomBins.begin(), atomBins.end());

373
374
    // Build the voxel hash.
    
375
    float edgeSizeX, edgeSizeY;
376
    if (!usePeriodic)
377
        edgeSizeX = edgeSizeY = maxDistance; // TODO - adjust this as needed
378
    else {
379
380
        edgeSizeX = 0.6f*periodicBoxSize[0]/floorf(periodicBoxSize[0]/maxDistance);
        edgeSizeY = 0.6f*periodicBoxSize[1]/floorf(periodicBoxSize[1]/maxDistance);
381
    }
382
    Voxels voxels(edgeSizeX, edgeSizeY, minx, maxx, miny, maxy, periodicBoxSize, usePeriodic);
383
384
385
    for (int i = 0; i < numAtoms; i++) {
        int atomIndex = atomBins[i].second;
        sortedAtoms[i] = atomIndex;
386
        voxels.insert(i, &atomLocations[4*atomIndex]);
387
    }
388
389
    voxels.sortItems();
    this->voxels = &voxels;
390
391
392
    
    // Signal the threads to start running and wait for them to finish.
    
393
394
    threads.resumeThreads();
    threads.waitForThreads();
395
    
396
    // Add padding atoms to fill up the last block.
397
    
398
399
400
401
402
403
404
405
406
    int numPadding = numBlocks*BlockSize-numAtoms;
    if (numPadding > 0) {
        char mask = (0xF0 >> numPadding) & 0xF;
        for (int i = 0; i < numPadding; i++)
            sortedAtoms.push_back(0);
        vector<char>& exc = blockExclusions[blockExclusions.size()-1];
        for (int i = 0; i < (int) exc.size(); i++)
            exc[i] |= mask;
    }
407
408
}

409
410
int CpuNeighborList::getNumBlocks() const {
    return sortedAtoms.size()/BlockSize;
411
412
}

413
414
415
416
417
418
419
420
421
422
423
424
425
const std::vector<int>& CpuNeighborList::getSortedAtoms() const {
    return sortedAtoms;
}

const std::vector<int>& CpuNeighborList::getBlockNeighbors(int blockIndex) const {
    return blockNeighbors[blockIndex];
}

const std::vector<char>& CpuNeighborList::getBlockExclusions(int blockIndex) const {
    return blockExclusions[blockIndex];
    
}

426
427
void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadIndex) {
    // Compute the positions of atoms along the Hilbert curve.
428

429
430
431
432
433
434
435
436
437
438
439
440
441
    float binWidth = max(max(maxx-minx, maxy-miny), maxz-minz)/255.0f;
    float invBinWidth = 1.0f/binWidth;
    bitmask_t coords[3];
    int numThreads = threads.getNumThreads();
    for (int i = threadIndex; i < numAtoms; i += numThreads) {
        const float* pos = &atomLocations[4*i];
        coords[0] = (bitmask_t) ((pos[0]-minx)*invBinWidth);
        coords[1] = (bitmask_t) ((pos[1]-miny)*invBinWidth);
        coords[2] = (bitmask_t) ((pos[2]-minz)*invBinWidth);
        int bin = (int) hilbert_c2i(3, 8, coords);
        atomBins[i] = pair<int, int>(bin, i);
    }
    threads.syncThreads();
442

443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
    // Compute this thread's subset of neighbors.

    int numBlocks = blockNeighbors.size();
    vector<int> blockAtoms;
    for (int i = threadIndex; i < numBlocks; i += numThreads) {
        // Find the atoms in this block and compute their bounding box.
        
        int firstIndex = BlockSize*i;
        int atomsInBlock = min(BlockSize, numAtoms-firstIndex);
        blockAtoms.resize(atomsInBlock);
        for (int j = 0; j < atomsInBlock; j++)
            blockAtoms[j] = sortedAtoms[firstIndex+j];
        fvec4 minPos(&atomLocations[4*sortedAtoms[firstIndex]]);
        fvec4 maxPos = minPos;
        for (int j = 1; j < atomsInBlock; j++) {
            fvec4 pos(&atomLocations[4*sortedAtoms[firstIndex+j]]);
            minPos = min(minPos, pos);
            maxPos = max(maxPos, pos);
461
        }
462
        voxels->getNeighbors(blockNeighbors[i], i, (maxPos+minPos)*0.5f, (maxPos-minPos)*0.5f, sortedAtoms, blockExclusions[i], maxDistance, blockAtoms, atomLocations);
463

464
        // Record the exclusions for this block.
465

466
467
468
469
470
471
472
473
474
        for (int j = 0; j < atomsInBlock; j++) {
            const set<int>& atomExclusions = (*exclusions)[sortedAtoms[firstIndex+j]];
            char mask = 1<<j;
            for (int k = 0; k < (int) blockNeighbors[i].size(); k++) {
                int atomIndex = blockNeighbors[i][k];
                if (atomExclusions.find(atomIndex) != atomExclusions.end())
                    blockExclusions[i][k] |= mask;
            }
        }
475
476
477
    }
}

478
} // namespace OpenMM