CpuNeighborList.cpp 17.5 KB
Newer Older
1
#include "CpuNeighborList.h"
2
#include "openmm/internal/hardware.h"
3
#include "openmm/internal/vectorize.h"
4
#include "hilbert.h"
5
#include <algorithm>
6
7
8
#include <set>
#include <map>
#include <cmath>
9
#include <smmintrin.h>
10
11
12
13
14

using namespace std;

namespace OpenMM {

15
16
const int CpuNeighborList::BlockSize = 4;

17
18
19
class VoxelIndex 
{
public:
20
    VoxelIndex(int x, int y) : x(x), y(y) {
21
    }
22
23
24
25
    int x;
    int y;
};

26
typedef pair<const float*, int> VoxelItem;
27

28
29
30
31
32
/**
 * This data structure organizes the particles spatially.  It divides them into bins along the x and y axes,
 * then sorts each bin along the z axis so ranges can be identified quickly with a binary search.
 */
class CpuNeighborList::Voxels {
33
public:
34
35
    Voxels(float vsx, float vsy, float minx, float maxx, float miny, float maxy, const float* periodicBoxSize, bool usePeriodic) :
            voxelSizeX(vsx), voxelSizeY(vsy), minx(minx), maxx(maxx), miny(miny), maxy(maxy), periodicBoxSize(periodicBoxSize), usePeriodic(usePeriodic) {
36
37
38
        if (usePeriodic) {
            nx = (int) floorf(periodicBoxSize[0]/voxelSizeX+0.5f);
            ny = (int) floorf(periodicBoxSize[1]/voxelSizeY+0.5f);
39
            voxelSizeX = periodicBoxSize[0]/nx;
peastman's avatar
Bug fix  
peastman committed
40
            voxelSizeY = periodicBoxSize[1]/ny;
41
42
43
44
45
46
47
48
49
50
51
52
53
54
        }
        else {
            nx = max(1, (int) floorf((maxx-minx)/voxelSizeX+0.5f));
            ny = max(1, (int) floorf((maxy-miny)/voxelSizeY+0.5f));
            if (maxx > minx)
                voxelSizeX = (maxx-minx)/nx;
            if (maxy > miny)
                voxelSizeY = (maxy-miny)/ny;
        }
        bins.resize(nx);
        for (int i = 0; i < nx; i++) {
            bins[i].resize(ny);
            for (int j = 0; j < ny; j++)
                bins[i][j].resize(0);
55
56
57
        }
    }

58
59
60
61
    /**
     * Insert a particle into the voxel data structure.
     */
    void insert(const int& atom, const float* location) {
62
        VoxelIndex voxelIndex = getVoxelIndex(location);
63
64
65
66
67
68
69
70
71
72
        bins[voxelIndex.x][voxelIndex.y].push_back(make_pair(location[2], VoxelItem(location, atom)));
    }
    
    /**
     * Sort the particles in each voxel by z coordinate.
     */
    void sortItems() {
        for (int i = 0; i < nx; i++)
            for (int j = 0; j < ny; j++)
                sort(bins[i][j].begin(), bins[i][j].end());
73
    }
74
    
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
    /**
     * Find the index of the first particle in voxel (x,y) whose z coordinate in >= the specified value.
     */
    int findLowerBound(int x, int y, double z) const {
        const vector<pair<float, VoxelItem> >& bin = bins[x][y];
        int lower = 0;
        int upper = bin.size();
        while (lower < upper) {
            int middle = (lower+upper)/2;
            if (bin[middle].first < z)
                lower = middle+1;
            else
                upper = middle;
        }
        return lower;
    }
    
    /**
     * Find the index of the first particle in voxel (x,y) whose z coordinate in greater than the specified value.
     */
    int findUpperBound(int x, int y, double z) const {
        const vector<pair<float, VoxelItem> >& bin = bins[x][y];
        int lower = 0;
        int upper = bin.size();
        while (lower < upper) {
            int middle = (lower+upper)/2;
            if (bin[middle].first > z)
                upper = middle;
            else
                lower = middle+1;
        }
        return upper;
    }
108

109
110
111
    /**
     * Get the voxel index containing a particular location.
     */
112
    VoxelIndex getVoxelIndex(const float* location) const {
113
        float xperiodic, yperiodic;
114
        if (!usePeriodic) {
115
116
            xperiodic = location[0]-minx;
            yperiodic = location[1]-miny;
117
118
119
120
121
        }
        else {
            xperiodic = location[0]-periodicBoxSize[0]*floorf(location[0]/periodicBoxSize[0]);
            yperiodic = location[1]-periodicBoxSize[1]*floorf(location[1]/periodicBoxSize[1]);
        }
122
123
        int x = min(nx-1, int(floorf(xperiodic / voxelSizeX)));
        int y = min(ny-1, int(floorf(yperiodic / voxelSizeY)));
124
        
125
        return VoxelIndex(x, y);
126
127
    }

128
129
130
    void getNeighbors(vector<int>& neighbors, int blockIndex, fvec4 blockCenter, fvec4 blockWidth, const vector<int>& sortedAtoms, vector<char>& exclusions, float maxDistance, const vector<int> blockAtoms, const float* atomLocations) const {
        neighbors.resize(0);
        exclusions.resize(0);
131
132
        fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
        fvec4 invBoxSize(1/periodicBoxSize[0], 1/periodicBoxSize[1], 1/periodicBoxSize[2], 0);
133
134
        
        float maxDistanceSquared = maxDistance * maxDistance;
135
136
        float refineCutoff = maxDistance-max(max(blockWidth[0], blockWidth[1]), blockWidth[2]);
        float refineCutoffSquared = refineCutoff*refineCutoff;
137

138
139
        int dIndexX = min(nx/2, int((maxDistance+blockWidth[0])/voxelSizeX)+1); // How may voxels away do we have to look?
        int dIndexY = min(ny/2, int((maxDistance+blockWidth[1])/voxelSizeY)+1);
140
141
142
        float centerPos[4];
        blockCenter.store(centerPos);
        VoxelIndex centerVoxelIndex = getVoxelIndex(centerPos);
143
144
145
146
147
        int startx = centerVoxelIndex.x-dIndexX;
        int starty = centerVoxelIndex.y-dIndexY;
        int endx = centerVoxelIndex.x+dIndexX;
        int endy = centerVoxelIndex.y+dIndexY;
        int numRanges;
148
        if (usePeriodic) {
149
150
151
152
153
154
155
156
157
158
            endx = min(endx, centerVoxelIndex.x-dIndexX+nx-1);
            endy = min(endy, centerVoxelIndex.y-dIndexY+ny-1);
            numRanges = 2;
        }
        else {
            startx = max(startx, 0);
            starty = max(starty, 0);
            endx = min(endx, nx-1);
            endy = min(endy, ny-1);
            numRanges = 1;
159
        }
160
        int lastSortedIndex = BlockSize*(blockIndex+1);
161
162
        VoxelIndex voxelIndex(0, 0);
        for (int x = startx; x <= endx; ++x) {
163
164
165
            voxelIndex.x = x;
            if (usePeriodic)
                voxelIndex.x = (x < 0 ? x+nx : (x >= nx ? x-nx : x));
166
167
            float dx = max(0.0f, voxelSizeX*max(0, abs(centerVoxelIndex.x-x)-1)-blockWidth[0]);
            for (int y = starty; y <= endy; ++y) {
168
169
170
                voxelIndex.y = y;
                if (usePeriodic)
                    voxelIndex.y = (y < 0 ? y+ny : (y >= ny ? y-ny : y));
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
                float dy = max(0.0f, voxelSizeY*max(0, abs(centerVoxelIndex.y-y)-1)-blockWidth[1]);
                
                // Identify the range of atoms within this bin we need to search.  When using periodic boundary
                // conditions, there may be two separate ranges.
                
                float dz = maxDistance+blockWidth[2];
                dz = sqrtf(max(0.0f, dz*dz-dx*dx-dy*dy));
                int rangeStart[2];
                int rangeEnd[2];
                rangeStart[0] = findLowerBound(voxelIndex.x, voxelIndex.y, centerPos[2]-dz);
                if (usePeriodic) {
                    rangeEnd[0] = findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz);
                    if (rangeStart[0] > 0) {
                        rangeStart[1] = 0;
                        rangeEnd[1] = min(findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz-periodicBoxSize[2]), rangeStart[0]);
                    }
                    else {
                        rangeStart[1] = max(findLowerBound(voxelIndex.x, voxelIndex.y, centerPos[2]-dz+periodicBoxSize[2]), rangeEnd[0]);
                        rangeEnd[1] = bins[voxelIndex.x][voxelIndex.y].size();
                    }
                }
                else
                    rangeEnd[0] = findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz);
                
                // Loop over atoms and check to see if they are neighbors of this block.
                
                for (int range = 0; range < numRanges; range++) {
                    for (int item = rangeStart[range]; item < rangeEnd[range]; item++) {
                        const int sortedIndex = bins[voxelIndex.x][voxelIndex.y][item].second.second;
200

peastman's avatar
peastman committed
201
                        // Avoid duplicate entries.
202
                        if (sortedIndex >= lastSortedIndex)
203
                            continue;
204
                        
205
                        fvec4 atomPos(bins[voxelIndex.x][voxelIndex.y][item].second.first);
206
                        fvec4 delta = atomPos-centerPos;
207
                        if (usePeriodic) {
208
209
                            fvec4 base = round(delta*invBoxSize)*boxSize;
                            delta = delta-base;
210
                        }
211
                        delta = max(0.0f, abs(delta)-blockWidth);
212
                        float dSquared = dot3(delta, delta);
213
214
                        if (dSquared > maxDistanceSquared)
                            continue;
peastman's avatar
peastman committed
215
                        
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
                        if (dSquared > refineCutoffSquared) {
                            // The distance is large enough that there might not be any actual interactions.
                            // Check individual atom pairs to be sure.
                            
                            bool any = false;
                            for (int k = 0; k < (int) blockAtoms.size(); k++) {
                                fvec4 pos1(&atomLocations[4*blockAtoms[k]]);
                                delta = atomPos-pos1;
                                if (usePeriodic) {
                                    fvec4 base = round(delta*invBoxSize)*boxSize;
                                    delta = delta-base;
                                }
                                float r2 = dot3(delta, delta);
                                if (r2 < maxDistanceSquared) {
                                    any = true;
                                    break;
                                }
                            }
                            if (!any)
                                continue;
                        }
                        
                        // Add this atom to the list of neighbors.
                        
                        neighbors.push_back(sortedAtoms[sortedIndex]);
                        if (sortedIndex < BlockSize*blockIndex)
                            exclusions.push_back(0);
                        else
                            exclusions.push_back(0xF & (0xF<<(sortedIndex-BlockSize*blockIndex)));
245
246
247
248
249
250
251
                    }
                }
            }
        }
    }

private:
252
253
254
    float voxelSizeX, voxelSizeY;
    float minx, maxx, miny, maxy;
    int nx, ny;
255
256
    const float* periodicBoxSize;
    const bool usePeriodic;
257
    vector<vector<vector<pair<float, VoxelItem> > > > bins;
258
259
};

260
261
262
263
264
265
266
class CpuNeighborList::ThreadData {
public:
    ThreadData(int index, CpuNeighborList& owner) : index(index), owner(owner) {
    }
    int index;
    CpuNeighborList& owner;
};
267

268
269
static void* threadBody(void* args) {
    CpuNeighborList::ThreadData& data = *reinterpret_cast<CpuNeighborList::ThreadData*>(args);
270
    data.owner.runThread(data.index);
271
272
273
274
275
276
    delete &data;
    return 0;
}

CpuNeighborList::CpuNeighborList() {
    isDeleted = false;
277
    numThreads = getNumProcessors();
278
279
280
281
    pthread_cond_init(&startCondition, NULL);
    pthread_cond_init(&endCondition, NULL);
    pthread_mutex_init(&lock, NULL);
    thread.resize(numThreads);
peastman's avatar
peastman committed
282
283
    pthread_mutex_lock(&lock);
    waitCount = 0;
284
285
286
287
288
    for (int i = 0; i < numThreads; i++) {
        ThreadData* data = new ThreadData(i, *this);
        threadData.push_back(data);
        pthread_create(&thread[i], NULL, threadBody, data);
    }
peastman's avatar
peastman committed
289
290
291
    while (waitCount < numThreads)
        pthread_cond_wait(&endCondition, &lock);
    pthread_mutex_unlock(&lock);
292
293
294
295
296
297
298
299
300
301
302
303
304
}

CpuNeighborList::~CpuNeighborList() {
    isDeleted = true;
    pthread_mutex_lock(&lock);
    pthread_cond_broadcast(&startCondition);
    pthread_mutex_unlock(&lock);
    for (int i = 0; i < (int) thread.size(); i++)
        pthread_join(thread[i], NULL);
    pthread_mutex_destroy(&lock);
    pthread_cond_destroy(&startCondition);
    pthread_cond_destroy(&endCondition);
}
305

306
307
void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& atomLocations, const vector<set<int> >& exclusions,
            const float* periodicBoxSize, bool usePeriodic, float maxDistance) {
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
    int numBlocks = (numAtoms+BlockSize-1)/BlockSize;
    blockNeighbors.resize(numBlocks);
    blockExclusions.resize(numBlocks);
    sortedAtoms.resize(numAtoms);
    
    // Sort the atoms based on a Hilbert curve.
    
    float minx = atomLocations[0], maxx = atomLocations[0];
    float miny = atomLocations[1], maxy = atomLocations[1];
    float minz = atomLocations[2], maxz = atomLocations[2];
    for (int i = 0; i < numAtoms; i++) {
        const float* pos = &atomLocations[4*i];
        if (pos[0] < minx)
            minx = pos[0];
        if (pos[1] < miny)
            miny = pos[1];
        if (pos[2] < minz)
            minz = pos[2];
        if (pos[0] > maxx)
            maxx = pos[0];
        if (pos[1] > maxy)
            maxy = pos[1];
        if (pos[2] > maxz)
            maxz = pos[2];
    }
    float binWidth = max(max(maxx-minx, maxy-miny), maxz-minz)/255.0f;
    float invBinWidth = 1.0f/binWidth;
    vector<pair<int, int> > atomBins(numAtoms);
    bitmask_t coords[3];
    for (int i = 0; i < numAtoms; i++) {
        const float* pos = &atomLocations[4*i];
        coords[0] = (bitmask_t) ((pos[0]-minx)*invBinWidth);
        coords[1] = (bitmask_t) ((pos[1]-miny)*invBinWidth);
        coords[2] = (bitmask_t) ((pos[2]-minz)*invBinWidth);
        int bin = (int) hilbert_c2i(3, 8, coords);
        atomBins[i] = pair<int, int>(bin, i);
    }
    sort(atomBins.begin(), atomBins.end());

347
348
    // Build the voxel hash.
    
349
    float edgeSizeX, edgeSizeY;
350
    if (!usePeriodic)
351
        edgeSizeX = edgeSizeY = maxDistance; // TODO - adjust this as needed
352
    else {
353
354
        edgeSizeX = 0.5f*periodicBoxSize[0]/floorf(periodicBoxSize[0]/maxDistance);
        edgeSizeY = 0.5f*periodicBoxSize[1]/floorf(periodicBoxSize[1]/maxDistance);
355
    }
356
    Voxels voxels(edgeSizeX, edgeSizeY, minx, maxx, miny, maxy, periodicBoxSize, usePeriodic);
357
358
359
    for (int i = 0; i < numAtoms; i++) {
        int atomIndex = atomBins[i].second;
        sortedAtoms[i] = atomIndex;
360
        voxels.insert(i, &atomLocations[4*atomIndex]);
361
    }
362
    voxels.sortItems();
363
364
365
    
    // Record the parameters for the threads.
    
366
    this->voxels = &voxels;
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
    this->exclusions = &exclusions;
    this->atomLocations = &atomLocations[0];
    this->periodicBoxSize = periodicBoxSize;
    this->numAtoms = numAtoms;
    this->usePeriodic = usePeriodic;
    this->maxDistance = maxDistance;
    
    // Signal the threads to start running and wait for them to finish.
    
    pthread_mutex_lock(&lock);
    waitCount = 0;
    pthread_cond_broadcast(&startCondition);
    while (waitCount < numThreads)
        pthread_cond_wait(&endCondition, &lock);
    pthread_mutex_unlock(&lock);
    
383
    // Add padding atoms to fill up the last block.
384
    
385
386
387
388
389
390
391
392
393
    int numPadding = numBlocks*BlockSize-numAtoms;
    if (numPadding > 0) {
        char mask = (0xF0 >> numPadding) & 0xF;
        for (int i = 0; i < numPadding; i++)
            sortedAtoms.push_back(0);
        vector<char>& exc = blockExclusions[blockExclusions.size()-1];
        for (int i = 0; i < (int) exc.size(); i++)
            exc[i] |= mask;
    }
394
395
}

396
397
int CpuNeighborList::getNumBlocks() const {
    return sortedAtoms.size()/BlockSize;
398
399
}

400
401
402
403
404
405
406
407
408
409
410
411
412
413
const std::vector<int>& CpuNeighborList::getSortedAtoms() const {
    return sortedAtoms;
}

const std::vector<int>& CpuNeighborList::getBlockNeighbors(int blockIndex) const {
    return blockNeighbors[blockIndex];
}

const std::vector<char>& CpuNeighborList::getBlockExclusions(int blockIndex) const {
    return blockExclusions[blockIndex];
    
}

void CpuNeighborList::runThread(int index) {
414
415
416
417
418
419
420
421
422
423
424
425
426
    while (true) {
        // Wait for the signal to start running.
        
        pthread_mutex_lock(&lock);
        waitCount++;
        pthread_cond_signal(&endCondition);
        pthread_cond_wait(&startCondition, &lock);
        pthread_mutex_unlock(&lock);
        if (isDeleted)
            break;
        
        // Compute this thread's subset of neighbors.
        
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
        int numBlocks = blockNeighbors.size();
        vector<int> blockAtoms;
        for (int i = index; i < numBlocks; i += numThreads) {
            {
            int firstIndex = BlockSize*i;
            int atomsInBlock = min(BlockSize, numAtoms-firstIndex);
            blockAtoms.resize(atomsInBlock);
            for (int j = 0; j < atomsInBlock; j++)
                blockAtoms[j] = sortedAtoms[firstIndex+j];
            }

                        
            int firstIndex = BlockSize*i;
            fvec4 minPos(&atomLocations[4*sortedAtoms[firstIndex]]);
            fvec4 maxPos = minPos;
            int atomsInBlock = min(BlockSize, numAtoms-firstIndex);
            for (int j = 1; j < atomsInBlock; j++) {
                fvec4 pos(&atomLocations[4*sortedAtoms[firstIndex+j]]);
                minPos = min(minPos, pos);
                maxPos = max(maxPos, pos);
            }
448
            voxels->getNeighbors(blockNeighbors[i], i, (maxPos+minPos)*0.5f, (maxPos-minPos)*0.5f, sortedAtoms, blockExclusions[i], maxDistance, blockAtoms, atomLocations);
449
450
451
452
453
454
455
456
457
458
459
460
461
            
            // Record the exclusions for this block.
            
            for (int j = 0; j < atomsInBlock; j++) {
                const set<int>& atomExclusions = (*exclusions)[sortedAtoms[firstIndex+j]];
                char mask = 1<<j;
                for (int k = 0; k < (int) blockNeighbors[i].size(); k++) {
                    int atomIndex = blockNeighbors[i][k];
                    if (atomExclusions.find(atomIndex) != atomExclusions.end())
                        blockExclusions[i][k] |= mask;
                }
            }
        }
462
463
464
    }
}

465
} // namespace OpenMM