CpuNeighborList.cpp 18.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
 * Portions copyright (c) 2013 Stanford University and the Authors.           *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
 * Permission is hereby granted, free of charge, to any person obtaining a    *
 * copy of this software and associated documentation files (the "Software"), *
 * to deal in the Software without restriction, including without limitation  *
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
 * and/or sell copies of the Software, and to permit persons to whom the      *
 * Software is furnished to do so, subject to the following conditions:       *
 *                                                                            *
 * The above copyright notice and this permission notice shall be included in *
 * all copies or substantial portions of the Software.                        *
 *                                                                            *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
 * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
 * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
 * -------------------------------------------------------------------------- */

32
#include "CpuNeighborList.h"
33
#include "openmm/internal/hardware.h"
34
#include "openmm/internal/vectorize.h"
35
#include "hilbert.h"
36
#include <algorithm>
37
38
39
#include <set>
#include <map>
#include <cmath>
40
#include <smmintrin.h>
41
42
43
44
45

using namespace std;

namespace OpenMM {

46
47
const int CpuNeighborList::BlockSize = 4;

48
49
50
class VoxelIndex 
{
public:
51
    VoxelIndex(int x, int y) : x(x), y(y) {
52
    }
53
54
55
56
    int x;
    int y;
};

57
58
59
60
61
/**
 * This data structure organizes the particles spatially.  It divides them into bins along the x and y axes,
 * then sorts each bin along the z axis so ranges can be identified quickly with a binary search.
 */
class CpuNeighborList::Voxels {
62
public:
63
64
    Voxels(float vsx, float vsy, float minx, float maxx, float miny, float maxy, const float* periodicBoxSize, bool usePeriodic) :
            voxelSizeX(vsx), voxelSizeY(vsy), minx(minx), maxx(maxx), miny(miny), maxy(maxy), periodicBoxSize(periodicBoxSize), usePeriodic(usePeriodic) {
65
66
67
        if (usePeriodic) {
            nx = (int) floorf(periodicBoxSize[0]/voxelSizeX+0.5f);
            ny = (int) floorf(periodicBoxSize[1]/voxelSizeY+0.5f);
68
            voxelSizeX = periodicBoxSize[0]/nx;
peastman's avatar
Bug fix  
peastman committed
69
            voxelSizeY = periodicBoxSize[1]/ny;
70
71
72
73
74
75
76
77
78
79
80
81
82
83
        }
        else {
            nx = max(1, (int) floorf((maxx-minx)/voxelSizeX+0.5f));
            ny = max(1, (int) floorf((maxy-miny)/voxelSizeY+0.5f));
            if (maxx > minx)
                voxelSizeX = (maxx-minx)/nx;
            if (maxy > miny)
                voxelSizeY = (maxy-miny)/ny;
        }
        bins.resize(nx);
        for (int i = 0; i < nx; i++) {
            bins[i].resize(ny);
            for (int j = 0; j < ny; j++)
                bins[i][j].resize(0);
84
85
86
        }
    }

87
88
89
90
    /**
     * Insert a particle into the voxel data structure.
     */
    void insert(const int& atom, const float* location) {
91
        VoxelIndex voxelIndex = getVoxelIndex(location);
peastman's avatar
peastman committed
92
        bins[voxelIndex.x][voxelIndex.y].push_back(make_pair(location[2], atom));
93
94
95
96
97
98
99
100
101
    }
    
    /**
     * Sort the particles in each voxel by z coordinate.
     */
    void sortItems() {
        for (int i = 0; i < nx; i++)
            for (int j = 0; j < ny; j++)
                sort(bins[i][j].begin(), bins[i][j].end());
102
    }
103
    
104
105
106
107
    /**
     * Find the index of the first particle in voxel (x,y) whose z coordinate in >= the specified value.
     */
    int findLowerBound(int x, int y, double z) const {
peastman's avatar
peastman committed
108
        const vector<pair<float, int> >& bin = bins[x][y];
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
        int lower = 0;
        int upper = bin.size();
        while (lower < upper) {
            int middle = (lower+upper)/2;
            if (bin[middle].first < z)
                lower = middle+1;
            else
                upper = middle;
        }
        return lower;
    }
    
    /**
     * Find the index of the first particle in voxel (x,y) whose z coordinate in greater than the specified value.
     */
    int findUpperBound(int x, int y, double z) const {
peastman's avatar
peastman committed
125
        const vector<pair<float, int> >& bin = bins[x][y];
126
127
128
129
130
131
132
133
134
135
136
        int lower = 0;
        int upper = bin.size();
        while (lower < upper) {
            int middle = (lower+upper)/2;
            if (bin[middle].first > z)
                upper = middle;
            else
                lower = middle+1;
        }
        return upper;
    }
137

138
139
140
    /**
     * Get the voxel index containing a particular location.
     */
141
    VoxelIndex getVoxelIndex(const float* location) const {
142
        float xperiodic, yperiodic;
143
        if (!usePeriodic) {
144
145
            xperiodic = location[0]-minx;
            yperiodic = location[1]-miny;
146
147
148
149
150
        }
        else {
            xperiodic = location[0]-periodicBoxSize[0]*floorf(location[0]/periodicBoxSize[0]);
            yperiodic = location[1]-periodicBoxSize[1]*floorf(location[1]/periodicBoxSize[1]);
        }
151
152
        int x = min(nx-1, int(floorf(xperiodic / voxelSizeX)));
        int y = min(ny-1, int(floorf(yperiodic / voxelSizeY)));
153
        
154
        return VoxelIndex(x, y);
155
156
    }

157
158
159
    void getNeighbors(vector<int>& neighbors, int blockIndex, fvec4 blockCenter, fvec4 blockWidth, const vector<int>& sortedAtoms, vector<char>& exclusions, float maxDistance, const vector<int> blockAtoms, const float* atomLocations) const {
        neighbors.resize(0);
        exclusions.resize(0);
160
161
        fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
        fvec4 invBoxSize(1/periodicBoxSize[0], 1/periodicBoxSize[1], 1/periodicBoxSize[2], 0);
162
163
        
        float maxDistanceSquared = maxDistance * maxDistance;
164
165
        float refineCutoff = maxDistance-max(max(blockWidth[0], blockWidth[1]), blockWidth[2]);
        float refineCutoffSquared = refineCutoff*refineCutoff;
166

167
168
        int dIndexX = min(nx/2, int((maxDistance+blockWidth[0])/voxelSizeX)+1); // How may voxels away do we have to look?
        int dIndexY = min(ny/2, int((maxDistance+blockWidth[1])/voxelSizeY)+1);
169
170
171
        float centerPos[4];
        blockCenter.store(centerPos);
        VoxelIndex centerVoxelIndex = getVoxelIndex(centerPos);
172
173
174
175
176
        int startx = centerVoxelIndex.x-dIndexX;
        int starty = centerVoxelIndex.y-dIndexY;
        int endx = centerVoxelIndex.x+dIndexX;
        int endy = centerVoxelIndex.y+dIndexY;
        int numRanges;
177
        if (usePeriodic) {
178
179
180
181
182
183
184
185
            endx = min(endx, centerVoxelIndex.x-dIndexX+nx-1);
            endy = min(endy, centerVoxelIndex.y-dIndexY+ny-1);
        }
        else {
            startx = max(startx, 0);
            starty = max(starty, 0);
            endx = min(endx, nx-1);
            endy = min(endy, ny-1);
186
        }
187
        int lastSortedIndex = BlockSize*(blockIndex+1);
188
189
        VoxelIndex voxelIndex(0, 0);
        for (int x = startx; x <= endx; ++x) {
190
191
192
            voxelIndex.x = x;
            if (usePeriodic)
                voxelIndex.x = (x < 0 ? x+nx : (x >= nx ? x-nx : x));
193
194
            float dx = max(0.0f, voxelSizeX*max(0, abs(centerVoxelIndex.x-x)-1)-blockWidth[0]);
            for (int y = starty; y <= endy; ++y) {
195
196
197
                voxelIndex.y = y;
                if (usePeriodic)
                    voxelIndex.y = (y < 0 ? y+ny : (y >= ny ? y-ny : y));
198
199
200
201
202
203
204
                float dy = max(0.0f, voxelSizeY*max(0, abs(centerVoxelIndex.y-y)-1)-blockWidth[1]);
                
                // Identify the range of atoms within this bin we need to search.  When using periodic boundary
                // conditions, there may be two separate ranges.
                
                float dz = maxDistance+blockWidth[2];
                dz = sqrtf(max(0.0f, dz*dz-dx*dx-dy*dy));
205
                bool needPeriodic = (voxelIndex.x != x || voxelIndex.y != y || centerPos[2]-dz < 0.0f || centerPos[2]+dz > periodicBoxSize[2]);
206
207
208
                int rangeStart[2];
                int rangeEnd[2];
                rangeStart[0] = findLowerBound(voxelIndex.x, voxelIndex.y, centerPos[2]-dz);
209
210
                if (needPeriodic) {
                    numRanges = 2;
211
212
213
214
215
216
217
218
219
220
                    rangeEnd[0] = findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz);
                    if (rangeStart[0] > 0) {
                        rangeStart[1] = 0;
                        rangeEnd[1] = min(findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz-periodicBoxSize[2]), rangeStart[0]);
                    }
                    else {
                        rangeStart[1] = max(findLowerBound(voxelIndex.x, voxelIndex.y, centerPos[2]-dz+periodicBoxSize[2]), rangeEnd[0]);
                        rangeEnd[1] = bins[voxelIndex.x][voxelIndex.y].size();
                    }
                }
221
222
                else {
                    numRanges = 1;
223
                    rangeEnd[0] = findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz);
224
                }
225
226
227
228
229
                
                // Loop over atoms and check to see if they are neighbors of this block.
                
                for (int range = 0; range < numRanges; range++) {
                    for (int item = rangeStart[range]; item < rangeEnd[range]; item++) {
peastman's avatar
peastman committed
230
                        const int sortedIndex = bins[voxelIndex.x][voxelIndex.y][item].second;
231

peastman's avatar
peastman committed
232
                        // Avoid duplicate entries.
233
                        if (sortedIndex >= lastSortedIndex)
234
                            continue;
235
                        
peastman's avatar
peastman committed
236
                        fvec4 atomPos(atomLocations+4*sortedAtoms[sortedIndex]);
237
                        fvec4 delta = atomPos-centerPos;
238
                        if (needPeriodic) {
239
240
                            fvec4 base = round(delta*invBoxSize)*boxSize;
                            delta = delta-base;
241
                        }
242
                        delta = max(0.0f, abs(delta)-blockWidth);
243
                        float dSquared = dot3(delta, delta);
244
245
                        if (dSquared > maxDistanceSquared)
                            continue;
peastman's avatar
peastman committed
246
                        
247
248
249
250
251
252
253
254
                        if (dSquared > refineCutoffSquared) {
                            // The distance is large enough that there might not be any actual interactions.
                            // Check individual atom pairs to be sure.
                            
                            bool any = false;
                            for (int k = 0; k < (int) blockAtoms.size(); k++) {
                                fvec4 pos1(&atomLocations[4*blockAtoms[k]]);
                                delta = atomPos-pos1;
255
                                if (needPeriodic) {
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
                                    fvec4 base = round(delta*invBoxSize)*boxSize;
                                    delta = delta-base;
                                }
                                float r2 = dot3(delta, delta);
                                if (r2 < maxDistanceSquared) {
                                    any = true;
                                    break;
                                }
                            }
                            if (!any)
                                continue;
                        }
                        
                        // Add this atom to the list of neighbors.
                        
                        neighbors.push_back(sortedAtoms[sortedIndex]);
                        if (sortedIndex < BlockSize*blockIndex)
                            exclusions.push_back(0);
                        else
                            exclusions.push_back(0xF & (0xF<<(sortedIndex-BlockSize*blockIndex)));
276
277
278
279
280
281
282
                    }
                }
            }
        }
    }

private:
283
284
285
    float voxelSizeX, voxelSizeY;
    float minx, maxx, miny, maxy;
    int nx, ny;
286
287
    const float* periodicBoxSize;
    const bool usePeriodic;
peastman's avatar
peastman committed
288
    vector<vector<vector<pair<float, int> > > > bins;
289
290
};

291
class CpuNeighborList::ThreadTask : public ThreadPool::Task {
292
public:
293
294
295
296
    ThreadTask(CpuNeighborList& owner) : owner(owner) {
    }
    void execute(ThreadPool& threads, int threadIndex) {
        owner.threadComputeNeighborList(threads, threadIndex);
297
298
299
    }
    CpuNeighborList& owner;
};
300

301
302
CpuNeighborList::CpuNeighborList() {
}
303

304
void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& atomLocations, const vector<set<int> >& exclusions,
305
            const float* periodicBoxSize, bool usePeriodic, float maxDistance, ThreadPool& threads) {
306
307
308
309
310
    int numBlocks = (numAtoms+BlockSize-1)/BlockSize;
    blockNeighbors.resize(numBlocks);
    blockExclusions.resize(numBlocks);
    sortedAtoms.resize(numAtoms);
    
311
    // Record the parameters for the threads.
312
    
313
314
315
316
317
318
319
320
321
322
323
    this->exclusions = &exclusions;
    this->atomLocations = &atomLocations[0];
    this->periodicBoxSize = periodicBoxSize;
    this->numAtoms = numAtoms;
    this->usePeriodic = usePeriodic;
    this->maxDistance = maxDistance;
    
    // Identify the range of atom positions along each axis.
    
    fvec4 minPos(&atomLocations[0]);
    fvec4 maxPos = minPos;
324
    for (int i = 0; i < numAtoms; i++) {
325
326
327
        fvec4 pos(&atomLocations[4*i]);
        minPos = min(minPos, pos);
        maxPos = max(maxPos, pos);
328
    }
329
330
331
332
333
334
335
336
337
338
    minx = minPos[0];
    maxx = maxPos[0];
    miny = minPos[1];
    maxy = maxPos[1];
    minz = minPos[2];
    maxz = maxPos[2];
    
    // Sort the atoms based on a Hilbert curve.
    
    atomBins.resize(numAtoms);
339
340
341
    ThreadTask task(*this);
    threads.execute(task);
    threads.waitForThreads();
342
343
    sort(atomBins.begin(), atomBins.end());

344
345
    // Build the voxel hash.
    
346
    float edgeSizeX, edgeSizeY;
347
    if (!usePeriodic)
348
        edgeSizeX = edgeSizeY = maxDistance; // TODO - adjust this as needed
349
    else {
350
351
        edgeSizeX = 0.5f*periodicBoxSize[0]/floorf(periodicBoxSize[0]/maxDistance);
        edgeSizeY = 0.5f*periodicBoxSize[1]/floorf(periodicBoxSize[1]/maxDistance);
352
    }
353
    Voxels voxels(edgeSizeX, edgeSizeY, minx, maxx, miny, maxy, periodicBoxSize, usePeriodic);
354
355
356
    for (int i = 0; i < numAtoms; i++) {
        int atomIndex = atomBins[i].second;
        sortedAtoms[i] = atomIndex;
357
        voxels.insert(i, &atomLocations[4*atomIndex]);
358
    }
359
360
    voxels.sortItems();
    this->voxels = &voxels;
361
362
363
    
    // Signal the threads to start running and wait for them to finish.
    
364
365
    threads.resumeThreads();
    threads.waitForThreads();
366
    
367
    // Add padding atoms to fill up the last block.
368
    
369
370
371
372
373
374
375
376
377
    int numPadding = numBlocks*BlockSize-numAtoms;
    if (numPadding > 0) {
        char mask = (0xF0 >> numPadding) & 0xF;
        for (int i = 0; i < numPadding; i++)
            sortedAtoms.push_back(0);
        vector<char>& exc = blockExclusions[blockExclusions.size()-1];
        for (int i = 0; i < (int) exc.size(); i++)
            exc[i] |= mask;
    }
378
379
}

380
381
int CpuNeighborList::getNumBlocks() const {
    return sortedAtoms.size()/BlockSize;
382
383
}

384
385
386
387
388
389
390
391
392
393
394
395
396
const std::vector<int>& CpuNeighborList::getSortedAtoms() const {
    return sortedAtoms;
}

const std::vector<int>& CpuNeighborList::getBlockNeighbors(int blockIndex) const {
    return blockNeighbors[blockIndex];
}

const std::vector<char>& CpuNeighborList::getBlockExclusions(int blockIndex) const {
    return blockExclusions[blockIndex];
    
}

397
398
void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadIndex) {
    // Compute the positions of atoms along the Hilbert curve.
399

400
401
402
403
404
405
406
407
408
409
410
411
412
    float binWidth = max(max(maxx-minx, maxy-miny), maxz-minz)/255.0f;
    float invBinWidth = 1.0f/binWidth;
    bitmask_t coords[3];
    int numThreads = threads.getNumThreads();
    for (int i = threadIndex; i < numAtoms; i += numThreads) {
        const float* pos = &atomLocations[4*i];
        coords[0] = (bitmask_t) ((pos[0]-minx)*invBinWidth);
        coords[1] = (bitmask_t) ((pos[1]-miny)*invBinWidth);
        coords[2] = (bitmask_t) ((pos[2]-minz)*invBinWidth);
        int bin = (int) hilbert_c2i(3, 8, coords);
        atomBins[i] = pair<int, int>(bin, i);
    }
    threads.syncThreads();
413

414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
    // Compute this thread's subset of neighbors.

    int numBlocks = blockNeighbors.size();
    vector<int> blockAtoms;
    for (int i = threadIndex; i < numBlocks; i += numThreads) {
        // Find the atoms in this block and compute their bounding box.
        
        int firstIndex = BlockSize*i;
        int atomsInBlock = min(BlockSize, numAtoms-firstIndex);
        blockAtoms.resize(atomsInBlock);
        for (int j = 0; j < atomsInBlock; j++)
            blockAtoms[j] = sortedAtoms[firstIndex+j];
        fvec4 minPos(&atomLocations[4*sortedAtoms[firstIndex]]);
        fvec4 maxPos = minPos;
        for (int j = 1; j < atomsInBlock; j++) {
            fvec4 pos(&atomLocations[4*sortedAtoms[firstIndex+j]]);
            minPos = min(minPos, pos);
            maxPos = max(maxPos, pos);
432
        }
433
        voxels->getNeighbors(blockNeighbors[i], i, (maxPos+minPos)*0.5f, (maxPos-minPos)*0.5f, sortedAtoms, blockExclusions[i], maxDistance, blockAtoms, atomLocations);
434

435
        // Record the exclusions for this block.
436

437
438
439
440
441
442
443
444
445
        for (int j = 0; j < atomsInBlock; j++) {
            const set<int>& atomExclusions = (*exclusions)[sortedAtoms[firstIndex+j]];
            char mask = 1<<j;
            for (int k = 0; k < (int) blockNeighbors[i].size(); k++) {
                int atomIndex = blockNeighbors[i][k];
                if (atomExclusions.find(atomIndex) != atomExclusions.end())
                    blockExclusions[i][k] |= mask;
            }
        }
446
447
448
    }
}

449
} // namespace OpenMM