CpuNeighborList.cpp 17.5 KB
Newer Older
1
#include "CpuNeighborList.h"
2
#include "openmm/internal/hardware.h"
3
#include "openmm/internal/vectorize.h"
4
#include "hilbert.h"
5
#include <algorithm>
6
7
8
#include <set>
#include <map>
#include <cmath>
9
#include <smmintrin.h>
10
11
12
13
14

using namespace std;

namespace OpenMM {

15
16
const int CpuNeighborList::BlockSize = 4;

17
18
19
class VoxelIndex 
{
public:
20
    VoxelIndex(int x, int y) : x(x), y(y) {
21
    }
22
23
24
25
    int x;
    int y;
};

26
27
28
29
30
/**
 * This data structure organizes the particles spatially.  It divides them into bins along the x and y axes,
 * then sorts each bin along the z axis so ranges can be identified quickly with a binary search.
 */
class CpuNeighborList::Voxels {
31
public:
32
33
    Voxels(float vsx, float vsy, float minx, float maxx, float miny, float maxy, const float* periodicBoxSize, bool usePeriodic) :
            voxelSizeX(vsx), voxelSizeY(vsy), minx(minx), maxx(maxx), miny(miny), maxy(maxy), periodicBoxSize(periodicBoxSize), usePeriodic(usePeriodic) {
34
35
36
        if (usePeriodic) {
            nx = (int) floorf(periodicBoxSize[0]/voxelSizeX+0.5f);
            ny = (int) floorf(periodicBoxSize[1]/voxelSizeY+0.5f);
37
            voxelSizeX = periodicBoxSize[0]/nx;
peastman's avatar
Bug fix  
peastman committed
38
            voxelSizeY = periodicBoxSize[1]/ny;
39
40
41
42
43
44
45
46
47
48
49
50
51
52
        }
        else {
            nx = max(1, (int) floorf((maxx-minx)/voxelSizeX+0.5f));
            ny = max(1, (int) floorf((maxy-miny)/voxelSizeY+0.5f));
            if (maxx > minx)
                voxelSizeX = (maxx-minx)/nx;
            if (maxy > miny)
                voxelSizeY = (maxy-miny)/ny;
        }
        bins.resize(nx);
        for (int i = 0; i < nx; i++) {
            bins[i].resize(ny);
            for (int j = 0; j < ny; j++)
                bins[i][j].resize(0);
53
54
55
        }
    }

56
57
58
59
    /**
     * Insert a particle into the voxel data structure.
     */
    void insert(const int& atom, const float* location) {
60
        VoxelIndex voxelIndex = getVoxelIndex(location);
peastman's avatar
peastman committed
61
        bins[voxelIndex.x][voxelIndex.y].push_back(make_pair(location[2], atom));
62
63
64
65
66
67
68
69
70
    }
    
    /**
     * Sort the particles in each voxel by z coordinate.
     */
    void sortItems() {
        for (int i = 0; i < nx; i++)
            for (int j = 0; j < ny; j++)
                sort(bins[i][j].begin(), bins[i][j].end());
71
    }
72
    
73
74
75
76
    /**
     * Find the index of the first particle in voxel (x,y) whose z coordinate in >= the specified value.
     */
    int findLowerBound(int x, int y, double z) const {
peastman's avatar
peastman committed
77
        const vector<pair<float, int> >& bin = bins[x][y];
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
        int lower = 0;
        int upper = bin.size();
        while (lower < upper) {
            int middle = (lower+upper)/2;
            if (bin[middle].first < z)
                lower = middle+1;
            else
                upper = middle;
        }
        return lower;
    }
    
    /**
     * Find the index of the first particle in voxel (x,y) whose z coordinate in greater than the specified value.
     */
    int findUpperBound(int x, int y, double z) const {
peastman's avatar
peastman committed
94
        const vector<pair<float, int> >& bin = bins[x][y];
95
96
97
98
99
100
101
102
103
104
105
        int lower = 0;
        int upper = bin.size();
        while (lower < upper) {
            int middle = (lower+upper)/2;
            if (bin[middle].first > z)
                upper = middle;
            else
                lower = middle+1;
        }
        return upper;
    }
106

107
108
109
    /**
     * Get the voxel index containing a particular location.
     */
110
    VoxelIndex getVoxelIndex(const float* location) const {
111
        float xperiodic, yperiodic;
112
        if (!usePeriodic) {
113
114
            xperiodic = location[0]-minx;
            yperiodic = location[1]-miny;
115
116
117
118
119
        }
        else {
            xperiodic = location[0]-periodicBoxSize[0]*floorf(location[0]/periodicBoxSize[0]);
            yperiodic = location[1]-periodicBoxSize[1]*floorf(location[1]/periodicBoxSize[1]);
        }
120
121
        int x = min(nx-1, int(floorf(xperiodic / voxelSizeX)));
        int y = min(ny-1, int(floorf(yperiodic / voxelSizeY)));
122
        
123
        return VoxelIndex(x, y);
124
125
    }

126
127
128
    void getNeighbors(vector<int>& neighbors, int blockIndex, fvec4 blockCenter, fvec4 blockWidth, const vector<int>& sortedAtoms, vector<char>& exclusions, float maxDistance, const vector<int> blockAtoms, const float* atomLocations) const {
        neighbors.resize(0);
        exclusions.resize(0);
129
130
        fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
        fvec4 invBoxSize(1/periodicBoxSize[0], 1/periodicBoxSize[1], 1/periodicBoxSize[2], 0);
131
132
        
        float maxDistanceSquared = maxDistance * maxDistance;
133
134
        float refineCutoff = maxDistance-max(max(blockWidth[0], blockWidth[1]), blockWidth[2]);
        float refineCutoffSquared = refineCutoff*refineCutoff;
135

136
137
        int dIndexX = min(nx/2, int((maxDistance+blockWidth[0])/voxelSizeX)+1); // How may voxels away do we have to look?
        int dIndexY = min(ny/2, int((maxDistance+blockWidth[1])/voxelSizeY)+1);
138
139
140
        float centerPos[4];
        blockCenter.store(centerPos);
        VoxelIndex centerVoxelIndex = getVoxelIndex(centerPos);
141
142
143
144
145
        int startx = centerVoxelIndex.x-dIndexX;
        int starty = centerVoxelIndex.y-dIndexY;
        int endx = centerVoxelIndex.x+dIndexX;
        int endy = centerVoxelIndex.y+dIndexY;
        int numRanges;
146
        if (usePeriodic) {
147
148
149
150
151
152
153
154
155
156
            endx = min(endx, centerVoxelIndex.x-dIndexX+nx-1);
            endy = min(endy, centerVoxelIndex.y-dIndexY+ny-1);
            numRanges = 2;
        }
        else {
            startx = max(startx, 0);
            starty = max(starty, 0);
            endx = min(endx, nx-1);
            endy = min(endy, ny-1);
            numRanges = 1;
157
        }
158
        int lastSortedIndex = BlockSize*(blockIndex+1);
159
160
        VoxelIndex voxelIndex(0, 0);
        for (int x = startx; x <= endx; ++x) {
161
162
163
            voxelIndex.x = x;
            if (usePeriodic)
                voxelIndex.x = (x < 0 ? x+nx : (x >= nx ? x-nx : x));
164
165
            float dx = max(0.0f, voxelSizeX*max(0, abs(centerVoxelIndex.x-x)-1)-blockWidth[0]);
            for (int y = starty; y <= endy; ++y) {
166
167
168
                voxelIndex.y = y;
                if (usePeriodic)
                    voxelIndex.y = (y < 0 ? y+ny : (y >= ny ? y-ny : y));
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
                float dy = max(0.0f, voxelSizeY*max(0, abs(centerVoxelIndex.y-y)-1)-blockWidth[1]);
                
                // Identify the range of atoms within this bin we need to search.  When using periodic boundary
                // conditions, there may be two separate ranges.
                
                float dz = maxDistance+blockWidth[2];
                dz = sqrtf(max(0.0f, dz*dz-dx*dx-dy*dy));
                int rangeStart[2];
                int rangeEnd[2];
                rangeStart[0] = findLowerBound(voxelIndex.x, voxelIndex.y, centerPos[2]-dz);
                if (usePeriodic) {
                    rangeEnd[0] = findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz);
                    if (rangeStart[0] > 0) {
                        rangeStart[1] = 0;
                        rangeEnd[1] = min(findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz-periodicBoxSize[2]), rangeStart[0]);
                    }
                    else {
                        rangeStart[1] = max(findLowerBound(voxelIndex.x, voxelIndex.y, centerPos[2]-dz+periodicBoxSize[2]), rangeEnd[0]);
                        rangeEnd[1] = bins[voxelIndex.x][voxelIndex.y].size();
                    }
                }
                else
                    rangeEnd[0] = findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz);
                
                // Loop over atoms and check to see if they are neighbors of this block.
                
                for (int range = 0; range < numRanges; range++) {
                    for (int item = rangeStart[range]; item < rangeEnd[range]; item++) {
peastman's avatar
peastman committed
197
                        const int sortedIndex = bins[voxelIndex.x][voxelIndex.y][item].second;
198

peastman's avatar
peastman committed
199
                        // Avoid duplicate entries.
200
                        if (sortedIndex >= lastSortedIndex)
201
                            continue;
202
                        
peastman's avatar
peastman committed
203
                        fvec4 atomPos(atomLocations+4*sortedAtoms[sortedIndex]);
204
                        fvec4 delta = atomPos-centerPos;
205
                        if (usePeriodic) {
206
207
                            fvec4 base = round(delta*invBoxSize)*boxSize;
                            delta = delta-base;
208
                        }
209
                        delta = max(0.0f, abs(delta)-blockWidth);
210
                        float dSquared = dot3(delta, delta);
211
212
                        if (dSquared > maxDistanceSquared)
                            continue;
peastman's avatar
peastman committed
213
                        
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
                        if (dSquared > refineCutoffSquared) {
                            // The distance is large enough that there might not be any actual interactions.
                            // Check individual atom pairs to be sure.
                            
                            bool any = false;
                            for (int k = 0; k < (int) blockAtoms.size(); k++) {
                                fvec4 pos1(&atomLocations[4*blockAtoms[k]]);
                                delta = atomPos-pos1;
                                if (usePeriodic) {
                                    fvec4 base = round(delta*invBoxSize)*boxSize;
                                    delta = delta-base;
                                }
                                float r2 = dot3(delta, delta);
                                if (r2 < maxDistanceSquared) {
                                    any = true;
                                    break;
                                }
                            }
                            if (!any)
                                continue;
                        }
                        
                        // Add this atom to the list of neighbors.
                        
                        neighbors.push_back(sortedAtoms[sortedIndex]);
                        if (sortedIndex < BlockSize*blockIndex)
                            exclusions.push_back(0);
                        else
                            exclusions.push_back(0xF & (0xF<<(sortedIndex-BlockSize*blockIndex)));
243
244
245
246
247
248
249
                    }
                }
            }
        }
    }

private:
250
251
252
    float voxelSizeX, voxelSizeY;
    float minx, maxx, miny, maxy;
    int nx, ny;
253
254
    const float* periodicBoxSize;
    const bool usePeriodic;
peastman's avatar
peastman committed
255
    vector<vector<vector<pair<float, int> > > > bins;
256
257
};

258
259
260
261
262
263
264
class CpuNeighborList::ThreadData {
public:
    ThreadData(int index, CpuNeighborList& owner) : index(index), owner(owner) {
    }
    int index;
    CpuNeighborList& owner;
};
265

266
267
static void* threadBody(void* args) {
    CpuNeighborList::ThreadData& data = *reinterpret_cast<CpuNeighborList::ThreadData*>(args);
268
    data.owner.runThread(data.index);
269
270
271
272
273
274
    delete &data;
    return 0;
}

CpuNeighborList::CpuNeighborList() {
    isDeleted = false;
275
    numThreads = getNumProcessors();
276
277
278
279
    pthread_cond_init(&startCondition, NULL);
    pthread_cond_init(&endCondition, NULL);
    pthread_mutex_init(&lock, NULL);
    thread.resize(numThreads);
peastman's avatar
peastman committed
280
281
    pthread_mutex_lock(&lock);
    waitCount = 0;
282
283
284
285
286
    for (int i = 0; i < numThreads; i++) {
        ThreadData* data = new ThreadData(i, *this);
        threadData.push_back(data);
        pthread_create(&thread[i], NULL, threadBody, data);
    }
peastman's avatar
peastman committed
287
288
289
    while (waitCount < numThreads)
        pthread_cond_wait(&endCondition, &lock);
    pthread_mutex_unlock(&lock);
290
291
292
293
294
295
296
297
298
299
300
301
302
}

CpuNeighborList::~CpuNeighborList() {
    isDeleted = true;
    pthread_mutex_lock(&lock);
    pthread_cond_broadcast(&startCondition);
    pthread_mutex_unlock(&lock);
    for (int i = 0; i < (int) thread.size(); i++)
        pthread_join(thread[i], NULL);
    pthread_mutex_destroy(&lock);
    pthread_cond_destroy(&startCondition);
    pthread_cond_destroy(&endCondition);
}
303

304
305
void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& atomLocations, const vector<set<int> >& exclusions,
            const float* periodicBoxSize, bool usePeriodic, float maxDistance) {
306
307
308
309
310
    int numBlocks = (numAtoms+BlockSize-1)/BlockSize;
    blockNeighbors.resize(numBlocks);
    blockExclusions.resize(numBlocks);
    sortedAtoms.resize(numAtoms);
    
311
    // Record the parameters for the threads.
312
    
313
314
315
316
317
318
319
320
321
322
323
    this->exclusions = &exclusions;
    this->atomLocations = &atomLocations[0];
    this->periodicBoxSize = periodicBoxSize;
    this->numAtoms = numAtoms;
    this->usePeriodic = usePeriodic;
    this->maxDistance = maxDistance;
    
    // Identify the range of atom positions along each axis.
    
    fvec4 minPos(&atomLocations[0]);
    fvec4 maxPos = minPos;
324
    for (int i = 0; i < numAtoms; i++) {
325
326
327
        fvec4 pos(&atomLocations[4*i]);
        minPos = min(minPos, pos);
        maxPos = max(maxPos, pos);
328
    }
329
330
331
332
333
334
335
336
337
338
339
340
    minx = minPos[0];
    maxx = maxPos[0];
    miny = minPos[1];
    maxy = maxPos[1];
    minz = minPos[2];
    maxz = maxPos[2];
    
    // Sort the atoms based on a Hilbert curve.
    
    atomBins.resize(numAtoms);
    pthread_mutex_lock(&lock);
    advanceThreads();
341
342
    sort(atomBins.begin(), atomBins.end());

343
344
    // Build the voxel hash.
    
345
    float edgeSizeX, edgeSizeY;
346
    if (!usePeriodic)
347
        edgeSizeX = edgeSizeY = maxDistance; // TODO - adjust this as needed
348
    else {
349
350
        edgeSizeX = 0.5f*periodicBoxSize[0]/floorf(periodicBoxSize[0]/maxDistance);
        edgeSizeY = 0.5f*periodicBoxSize[1]/floorf(periodicBoxSize[1]/maxDistance);
351
    }
352
    Voxels voxels(edgeSizeX, edgeSizeY, minx, maxx, miny, maxy, periodicBoxSize, usePeriodic);
353
354
355
    for (int i = 0; i < numAtoms; i++) {
        int atomIndex = atomBins[i].second;
        sortedAtoms[i] = atomIndex;
356
        voxels.insert(i, &atomLocations[4*atomIndex]);
357
    }
358
359
    voxels.sortItems();
    this->voxels = &voxels;
360
361
362
    
    // Signal the threads to start running and wait for them to finish.
    
363
    advanceThreads();
364
365
    pthread_mutex_unlock(&lock);
    
366
    // Add padding atoms to fill up the last block.
367
    
368
369
370
371
372
373
374
375
376
    int numPadding = numBlocks*BlockSize-numAtoms;
    if (numPadding > 0) {
        char mask = (0xF0 >> numPadding) & 0xF;
        for (int i = 0; i < numPadding; i++)
            sortedAtoms.push_back(0);
        vector<char>& exc = blockExclusions[blockExclusions.size()-1];
        for (int i = 0; i < (int) exc.size(); i++)
            exc[i] |= mask;
    }
377
378
}

379
380
int CpuNeighborList::getNumBlocks() const {
    return sortedAtoms.size()/BlockSize;
381
382
}

383
384
385
386
387
388
389
390
391
392
393
394
395
396
const std::vector<int>& CpuNeighborList::getSortedAtoms() const {
    return sortedAtoms;
}

const std::vector<int>& CpuNeighborList::getBlockNeighbors(int blockIndex) const {
    return blockNeighbors[blockIndex];
}

const std::vector<char>& CpuNeighborList::getBlockExclusions(int blockIndex) const {
    return blockExclusions[blockIndex];
    
}

void CpuNeighborList::runThread(int index) {
397
398
399
    while (true) {
        // Wait for the signal to start running.
        
400
        threadWait();
401
402
403
        if (isDeleted)
            break;
        
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
        // Compute the positions of atoms along the Hilbert curve.

        float binWidth = max(max(maxx-minx, maxy-miny), maxz-minz)/255.0f;
        float invBinWidth = 1.0f/binWidth;
        bitmask_t coords[3];
        for (int i = index; i < numAtoms; i += numThreads) {
            const float* pos = &atomLocations[4*i];
            coords[0] = (bitmask_t) ((pos[0]-minx)*invBinWidth);
            coords[1] = (bitmask_t) ((pos[1]-miny)*invBinWidth);
            coords[2] = (bitmask_t) ((pos[2]-minz)*invBinWidth);
            int bin = (int) hilbert_c2i(3, 8, coords);
            atomBins[i] = pair<int, int>(bin, i);
        }
        threadWait();
        
419
420
        // Compute this thread's subset of neighbors.
        
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
        int numBlocks = blockNeighbors.size();
        vector<int> blockAtoms;
        for (int i = index; i < numBlocks; i += numThreads) {
            {
            int firstIndex = BlockSize*i;
            int atomsInBlock = min(BlockSize, numAtoms-firstIndex);
            blockAtoms.resize(atomsInBlock);
            for (int j = 0; j < atomsInBlock; j++)
                blockAtoms[j] = sortedAtoms[firstIndex+j];
            }

                        
            int firstIndex = BlockSize*i;
            fvec4 minPos(&atomLocations[4*sortedAtoms[firstIndex]]);
            fvec4 maxPos = minPos;
            int atomsInBlock = min(BlockSize, numAtoms-firstIndex);
            for (int j = 1; j < atomsInBlock; j++) {
                fvec4 pos(&atomLocations[4*sortedAtoms[firstIndex+j]]);
                minPos = min(minPos, pos);
                maxPos = max(maxPos, pos);
            }
442
            voxels->getNeighbors(blockNeighbors[i], i, (maxPos+minPos)*0.5f, (maxPos-minPos)*0.5f, sortedAtoms, blockExclusions[i], maxDistance, blockAtoms, atomLocations);
443
444
445
446
447
448
449
450
451
452
453
454
455
            
            // Record the exclusions for this block.
            
            for (int j = 0; j < atomsInBlock; j++) {
                const set<int>& atomExclusions = (*exclusions)[sortedAtoms[firstIndex+j]];
                char mask = 1<<j;
                for (int k = 0; k < (int) blockNeighbors[i].size(); k++) {
                    int atomIndex = blockNeighbors[i][k];
                    if (atomExclusions.find(atomIndex) != atomExclusions.end())
                        blockExclusions[i][k] |= mask;
                }
            }
        }
456
457
458
    }
}

459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
void CpuNeighborList::threadWait() {
    pthread_mutex_lock(&lock);
    waitCount++;
    pthread_cond_signal(&endCondition);
    pthread_cond_wait(&startCondition, &lock);
    pthread_mutex_unlock(&lock);
}

void CpuNeighborList::advanceThreads() {
    waitCount = 0;
    pthread_cond_broadcast(&startCondition);
    while (waitCount < numThreads) {
        pthread_cond_wait(&endCondition, &lock);
    }
}

475
} // namespace OpenMM