CpuNeighborList.cpp 18.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
 * Portions copyright (c) 2013 Stanford University and the Authors.           *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
 * Permission is hereby granted, free of charge, to any person obtaining a    *
 * copy of this software and associated documentation files (the "Software"), *
 * to deal in the Software without restriction, including without limitation  *
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
 * and/or sell copies of the Software, and to permit persons to whom the      *
 * Software is furnished to do so, subject to the following conditions:       *
 *                                                                            *
 * The above copyright notice and this permission notice shall be included in *
 * all copies or substantial portions of the Software.                        *
 *                                                                            *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
 * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
 * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
 * -------------------------------------------------------------------------- */

32
#include "CpuNeighborList.h"
33
#include "openmm/internal/hardware.h"
34
#include "openmm/internal/vectorize.h"
35
#include "hilbert.h"
36
#include <algorithm>
37
38
39
#include <set>
#include <map>
#include <cmath>
40
#include <smmintrin.h>
41
42
43
44
45

using namespace std;

namespace OpenMM {

46
47
const int CpuNeighborList::BlockSize = 4;

48
49
50
class VoxelIndex 
{
public:
51
    VoxelIndex(int x, int y) : x(x), y(y) {
52
    }
53
54
55
56
    int x;
    int y;
};

57
58
59
60
61
/**
 * This data structure organizes the particles spatially.  It divides them into bins along the x and y axes,
 * then sorts each bin along the z axis so ranges can be identified quickly with a binary search.
 */
class CpuNeighborList::Voxels {
62
public:
63
64
    Voxels(float vsx, float vsy, float minx, float maxx, float miny, float maxy, const float* periodicBoxSize, bool usePeriodic) :
            voxelSizeX(vsx), voxelSizeY(vsy), minx(minx), maxx(maxx), miny(miny), maxy(maxy), periodicBoxSize(periodicBoxSize), usePeriodic(usePeriodic) {
65
66
67
        if (usePeriodic) {
            nx = (int) floorf(periodicBoxSize[0]/voxelSizeX+0.5f);
            ny = (int) floorf(periodicBoxSize[1]/voxelSizeY+0.5f);
68
            voxelSizeX = periodicBoxSize[0]/nx;
peastman's avatar
Bug fix  
peastman committed
69
            voxelSizeY = periodicBoxSize[1]/ny;
70
71
72
73
74
75
76
77
78
79
80
81
82
83
        }
        else {
            nx = max(1, (int) floorf((maxx-minx)/voxelSizeX+0.5f));
            ny = max(1, (int) floorf((maxy-miny)/voxelSizeY+0.5f));
            if (maxx > minx)
                voxelSizeX = (maxx-minx)/nx;
            if (maxy > miny)
                voxelSizeY = (maxy-miny)/ny;
        }
        bins.resize(nx);
        for (int i = 0; i < nx; i++) {
            bins[i].resize(ny);
            for (int j = 0; j < ny; j++)
                bins[i][j].resize(0);
84
85
86
        }
    }

87
88
89
90
    /**
     * Insert a particle into the voxel data structure.
     */
    void insert(const int& atom, const float* location) {
91
        VoxelIndex voxelIndex = getVoxelIndex(location);
peastman's avatar
peastman committed
92
        bins[voxelIndex.x][voxelIndex.y].push_back(make_pair(location[2], atom));
93
94
95
96
97
98
99
100
101
    }
    
    /**
     * Sort the particles in each voxel by z coordinate.
     */
    void sortItems() {
        for (int i = 0; i < nx; i++)
            for (int j = 0; j < ny; j++)
                sort(bins[i][j].begin(), bins[i][j].end());
102
    }
103
    
104
105
106
107
    /**
     * Find the index of the first particle in voxel (x,y) whose z coordinate in >= the specified value.
     */
    int findLowerBound(int x, int y, double z) const {
peastman's avatar
peastman committed
108
        const vector<pair<float, int> >& bin = bins[x][y];
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
        int lower = 0;
        int upper = bin.size();
        while (lower < upper) {
            int middle = (lower+upper)/2;
            if (bin[middle].first < z)
                lower = middle+1;
            else
                upper = middle;
        }
        return lower;
    }
    
    /**
     * Find the index of the first particle in voxel (x,y) whose z coordinate in greater than the specified value.
     */
    int findUpperBound(int x, int y, double z) const {
peastman's avatar
peastman committed
125
        const vector<pair<float, int> >& bin = bins[x][y];
126
127
128
129
130
131
132
133
134
135
136
        int lower = 0;
        int upper = bin.size();
        while (lower < upper) {
            int middle = (lower+upper)/2;
            if (bin[middle].first > z)
                upper = middle;
            else
                lower = middle+1;
        }
        return upper;
    }
137

138
139
140
    /**
     * Get the voxel index containing a particular location.
     */
141
    VoxelIndex getVoxelIndex(const float* location) const {
142
        float xperiodic, yperiodic;
143
        if (!usePeriodic) {
144
145
            xperiodic = location[0]-minx;
            yperiodic = location[1]-miny;
146
147
148
149
150
        }
        else {
            xperiodic = location[0]-periodicBoxSize[0]*floorf(location[0]/periodicBoxSize[0]);
            yperiodic = location[1]-periodicBoxSize[1]*floorf(location[1]/periodicBoxSize[1]);
        }
151
152
        int x = min(nx-1, int(floorf(xperiodic / voxelSizeX)));
        int y = min(ny-1, int(floorf(yperiodic / voxelSizeY)));
153
        
154
        return VoxelIndex(x, y);
155
156
    }

157
158
159
    void getNeighbors(vector<int>& neighbors, int blockIndex, fvec4 blockCenter, fvec4 blockWidth, const vector<int>& sortedAtoms, vector<char>& exclusions, float maxDistance, const vector<int> blockAtoms, const float* atomLocations) const {
        neighbors.resize(0);
        exclusions.resize(0);
160
161
        fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
        fvec4 invBoxSize(1/periodicBoxSize[0], 1/periodicBoxSize[1], 1/periodicBoxSize[2], 0);
162
163
        
        float maxDistanceSquared = maxDistance * maxDistance;
164
165
        float refineCutoff = maxDistance-max(max(blockWidth[0], blockWidth[1]), blockWidth[2]);
        float refineCutoffSquared = refineCutoff*refineCutoff;
166

167
168
        int dIndexX = min(nx/2, int((maxDistance+blockWidth[0])/voxelSizeX)+1); // How may voxels away do we have to look?
        int dIndexY = min(ny/2, int((maxDistance+blockWidth[1])/voxelSizeY)+1);
169
170
171
        float centerPos[4];
        blockCenter.store(centerPos);
        VoxelIndex centerVoxelIndex = getVoxelIndex(centerPos);
172
173
174
175
176
        int startx = centerVoxelIndex.x-dIndexX;
        int starty = centerVoxelIndex.y-dIndexY;
        int endx = centerVoxelIndex.x+dIndexX;
        int endy = centerVoxelIndex.y+dIndexY;
        int numRanges;
177
        if (usePeriodic) {
178
179
180
181
182
183
184
185
186
187
            endx = min(endx, centerVoxelIndex.x-dIndexX+nx-1);
            endy = min(endy, centerVoxelIndex.y-dIndexY+ny-1);
            numRanges = 2;
        }
        else {
            startx = max(startx, 0);
            starty = max(starty, 0);
            endx = min(endx, nx-1);
            endy = min(endy, ny-1);
            numRanges = 1;
188
        }
189
        int lastSortedIndex = BlockSize*(blockIndex+1);
190
191
        VoxelIndex voxelIndex(0, 0);
        for (int x = startx; x <= endx; ++x) {
192
193
194
            voxelIndex.x = x;
            if (usePeriodic)
                voxelIndex.x = (x < 0 ? x+nx : (x >= nx ? x-nx : x));
195
196
            float dx = max(0.0f, voxelSizeX*max(0, abs(centerVoxelIndex.x-x)-1)-blockWidth[0]);
            for (int y = starty; y <= endy; ++y) {
197
198
199
                voxelIndex.y = y;
                if (usePeriodic)
                    voxelIndex.y = (y < 0 ? y+ny : (y >= ny ? y-ny : y));
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
                float dy = max(0.0f, voxelSizeY*max(0, abs(centerVoxelIndex.y-y)-1)-blockWidth[1]);
                
                // Identify the range of atoms within this bin we need to search.  When using periodic boundary
                // conditions, there may be two separate ranges.
                
                float dz = maxDistance+blockWidth[2];
                dz = sqrtf(max(0.0f, dz*dz-dx*dx-dy*dy));
                int rangeStart[2];
                int rangeEnd[2];
                rangeStart[0] = findLowerBound(voxelIndex.x, voxelIndex.y, centerPos[2]-dz);
                if (usePeriodic) {
                    rangeEnd[0] = findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz);
                    if (rangeStart[0] > 0) {
                        rangeStart[1] = 0;
                        rangeEnd[1] = min(findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz-periodicBoxSize[2]), rangeStart[0]);
                    }
                    else {
                        rangeStart[1] = max(findLowerBound(voxelIndex.x, voxelIndex.y, centerPos[2]-dz+periodicBoxSize[2]), rangeEnd[0]);
                        rangeEnd[1] = bins[voxelIndex.x][voxelIndex.y].size();
                    }
                }
                else
                    rangeEnd[0] = findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz);
                
                // Loop over atoms and check to see if they are neighbors of this block.
                
                for (int range = 0; range < numRanges; range++) {
                    for (int item = rangeStart[range]; item < rangeEnd[range]; item++) {
peastman's avatar
peastman committed
228
                        const int sortedIndex = bins[voxelIndex.x][voxelIndex.y][item].second;
229

peastman's avatar
peastman committed
230
                        // Avoid duplicate entries.
231
                        if (sortedIndex >= lastSortedIndex)
232
                            continue;
233
                        
peastman's avatar
peastman committed
234
                        fvec4 atomPos(atomLocations+4*sortedAtoms[sortedIndex]);
235
                        fvec4 delta = atomPos-centerPos;
236
                        if (usePeriodic) {
237
238
                            fvec4 base = round(delta*invBoxSize)*boxSize;
                            delta = delta-base;
239
                        }
240
                        delta = max(0.0f, abs(delta)-blockWidth);
241
                        float dSquared = dot3(delta, delta);
242
243
                        if (dSquared > maxDistanceSquared)
                            continue;
peastman's avatar
peastman committed
244
                        
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
                        if (dSquared > refineCutoffSquared) {
                            // The distance is large enough that there might not be any actual interactions.
                            // Check individual atom pairs to be sure.
                            
                            bool any = false;
                            for (int k = 0; k < (int) blockAtoms.size(); k++) {
                                fvec4 pos1(&atomLocations[4*blockAtoms[k]]);
                                delta = atomPos-pos1;
                                if (usePeriodic) {
                                    fvec4 base = round(delta*invBoxSize)*boxSize;
                                    delta = delta-base;
                                }
                                float r2 = dot3(delta, delta);
                                if (r2 < maxDistanceSquared) {
                                    any = true;
                                    break;
                                }
                            }
                            if (!any)
                                continue;
                        }
                        
                        // Add this atom to the list of neighbors.
                        
                        neighbors.push_back(sortedAtoms[sortedIndex]);
                        if (sortedIndex < BlockSize*blockIndex)
                            exclusions.push_back(0);
                        else
                            exclusions.push_back(0xF & (0xF<<(sortedIndex-BlockSize*blockIndex)));
274
275
276
277
278
279
280
                    }
                }
            }
        }
    }

private:
281
282
283
    float voxelSizeX, voxelSizeY;
    float minx, maxx, miny, maxy;
    int nx, ny;
284
285
    const float* periodicBoxSize;
    const bool usePeriodic;
peastman's avatar
peastman committed
286
    vector<vector<vector<pair<float, int> > > > bins;
287
288
};

289
class CpuNeighborList::ThreadTask : public ThreadPool::Task {
290
public:
291
292
293
294
    ThreadTask(CpuNeighborList& owner) : owner(owner) {
    }
    void execute(ThreadPool& threads, int threadIndex) {
        owner.threadComputeNeighborList(threads, threadIndex);
295
296
297
    }
    CpuNeighborList& owner;
};
298

299
300
CpuNeighborList::CpuNeighborList() {
}
301

302
void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& atomLocations, const vector<set<int> >& exclusions,
303
            const float* periodicBoxSize, bool usePeriodic, float maxDistance, ThreadPool& threads) {
304
305
306
307
308
    int numBlocks = (numAtoms+BlockSize-1)/BlockSize;
    blockNeighbors.resize(numBlocks);
    blockExclusions.resize(numBlocks);
    sortedAtoms.resize(numAtoms);
    
309
    // Record the parameters for the threads.
310
    
311
312
313
314
315
316
317
318
319
320
321
    this->exclusions = &exclusions;
    this->atomLocations = &atomLocations[0];
    this->periodicBoxSize = periodicBoxSize;
    this->numAtoms = numAtoms;
    this->usePeriodic = usePeriodic;
    this->maxDistance = maxDistance;
    
    // Identify the range of atom positions along each axis.
    
    fvec4 minPos(&atomLocations[0]);
    fvec4 maxPos = minPos;
322
    for (int i = 0; i < numAtoms; i++) {
323
324
325
        fvec4 pos(&atomLocations[4*i]);
        minPos = min(minPos, pos);
        maxPos = max(maxPos, pos);
326
    }
327
328
329
330
331
332
333
334
335
336
    minx = minPos[0];
    maxx = maxPos[0];
    miny = minPos[1];
    maxy = maxPos[1];
    minz = minPos[2];
    maxz = maxPos[2];
    
    // Sort the atoms based on a Hilbert curve.
    
    atomBins.resize(numAtoms);
337
338
339
    ThreadTask task(*this);
    threads.execute(task);
    threads.waitForThreads();
340
341
    sort(atomBins.begin(), atomBins.end());

342
343
    // Build the voxel hash.
    
344
    float edgeSizeX, edgeSizeY;
345
    if (!usePeriodic)
346
        edgeSizeX = edgeSizeY = maxDistance; // TODO - adjust this as needed
347
    else {
348
349
        edgeSizeX = 0.5f*periodicBoxSize[0]/floorf(periodicBoxSize[0]/maxDistance);
        edgeSizeY = 0.5f*periodicBoxSize[1]/floorf(periodicBoxSize[1]/maxDistance);
350
    }
351
    Voxels voxels(edgeSizeX, edgeSizeY, minx, maxx, miny, maxy, periodicBoxSize, usePeriodic);
352
353
354
    for (int i = 0; i < numAtoms; i++) {
        int atomIndex = atomBins[i].second;
        sortedAtoms[i] = atomIndex;
355
        voxels.insert(i, &atomLocations[4*atomIndex]);
356
    }
357
358
    voxels.sortItems();
    this->voxels = &voxels;
359
360
361
    
    // Signal the threads to start running and wait for them to finish.
    
362
363
    threads.resumeThreads();
    threads.waitForThreads();
364
    
365
    // Add padding atoms to fill up the last block.
366
    
367
368
369
370
371
372
373
374
375
    int numPadding = numBlocks*BlockSize-numAtoms;
    if (numPadding > 0) {
        char mask = (0xF0 >> numPadding) & 0xF;
        for (int i = 0; i < numPadding; i++)
            sortedAtoms.push_back(0);
        vector<char>& exc = blockExclusions[blockExclusions.size()-1];
        for (int i = 0; i < (int) exc.size(); i++)
            exc[i] |= mask;
    }
376
377
}

378
379
int CpuNeighborList::getNumBlocks() const {
    return sortedAtoms.size()/BlockSize;
380
381
}

382
383
384
385
386
387
388
389
390
391
392
393
394
const std::vector<int>& CpuNeighborList::getSortedAtoms() const {
    return sortedAtoms;
}

const std::vector<int>& CpuNeighborList::getBlockNeighbors(int blockIndex) const {
    return blockNeighbors[blockIndex];
}

const std::vector<char>& CpuNeighborList::getBlockExclusions(int blockIndex) const {
    return blockExclusions[blockIndex];
    
}

395
396
void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadIndex) {
    // Compute the positions of atoms along the Hilbert curve.
397

398
399
400
401
402
403
404
405
406
407
408
409
410
    float binWidth = max(max(maxx-minx, maxy-miny), maxz-minz)/255.0f;
    float invBinWidth = 1.0f/binWidth;
    bitmask_t coords[3];
    int numThreads = threads.getNumThreads();
    for (int i = threadIndex; i < numAtoms; i += numThreads) {
        const float* pos = &atomLocations[4*i];
        coords[0] = (bitmask_t) ((pos[0]-minx)*invBinWidth);
        coords[1] = (bitmask_t) ((pos[1]-miny)*invBinWidth);
        coords[2] = (bitmask_t) ((pos[2]-minz)*invBinWidth);
        int bin = (int) hilbert_c2i(3, 8, coords);
        atomBins[i] = pair<int, int>(bin, i);
    }
    threads.syncThreads();
411

412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
    // Compute this thread's subset of neighbors.

    int numBlocks = blockNeighbors.size();
    vector<int> blockAtoms;
    for (int i = threadIndex; i < numBlocks; i += numThreads) {
        // Find the atoms in this block and compute their bounding box.
        
        int firstIndex = BlockSize*i;
        int atomsInBlock = min(BlockSize, numAtoms-firstIndex);
        blockAtoms.resize(atomsInBlock);
        for (int j = 0; j < atomsInBlock; j++)
            blockAtoms[j] = sortedAtoms[firstIndex+j];
        fvec4 minPos(&atomLocations[4*sortedAtoms[firstIndex]]);
        fvec4 maxPos = minPos;
        for (int j = 1; j < atomsInBlock; j++) {
            fvec4 pos(&atomLocations[4*sortedAtoms[firstIndex+j]]);
            minPos = min(minPos, pos);
            maxPos = max(maxPos, pos);
430
        }
431
        voxels->getNeighbors(blockNeighbors[i], i, (maxPos+minPos)*0.5f, (maxPos-minPos)*0.5f, sortedAtoms, blockExclusions[i], maxDistance, blockAtoms, atomLocations);
432

433
        // Record the exclusions for this block.
434

435
436
437
438
439
440
441
442
443
        for (int j = 0; j < atomsInBlock; j++) {
            const set<int>& atomExclusions = (*exclusions)[sortedAtoms[firstIndex+j]];
            char mask = 1<<j;
            for (int k = 0; k < (int) blockNeighbors[i].size(); k++) {
                int atomIndex = blockNeighbors[i][k];
                if (atomExclusions.find(atomIndex) != atomExclusions.end())
                    blockExclusions[i][k] |= mask;
            }
        }
444
445
446
    }
}

447
} // namespace OpenMM