CpuNeighborList.cpp 17.6 KB
Newer Older
1
#include "CpuNeighborList.h"
2
#include "openmm/internal/hardware.h"
3
#include "openmm/internal/vectorize.h"
4
#include "hilbert.h"
5
#include <algorithm>
6
7
8
#include <set>
#include <map>
#include <cmath>
9
#include <smmintrin.h>
10
11
12
13
14

using namespace std;

namespace OpenMM {

15
16
const int CpuNeighborList::BlockSize = 4;

17
18
19
class VoxelIndex 
{
public:
20
    VoxelIndex(int x, int y) : x(x), y(y) {
21
    }
22
23
24
25
    int x;
    int y;
};

26
typedef pair<const float*, int> VoxelItem;
27

28
29
30
31
32
/**
 * This data structure organizes the particles spatially.  It divides them into bins along the x and y axes,
 * then sorts each bin along the z axis so ranges can be identified quickly with a binary search.
 */
class CpuNeighborList::Voxels {
33
public:
34
35
    Voxels(float vsx, float vsy, float minx, float maxx, float miny, float maxy, const float* periodicBoxSize, bool usePeriodic) :
            voxelSizeX(vsx), voxelSizeY(vsy), minx(minx), maxx(maxx), miny(miny), maxy(maxy), periodicBoxSize(periodicBoxSize), usePeriodic(usePeriodic) {
36
37
38
        if (usePeriodic) {
            nx = (int) floorf(periodicBoxSize[0]/voxelSizeX+0.5f);
            ny = (int) floorf(periodicBoxSize[1]/voxelSizeY+0.5f);
39
            voxelSizeX = periodicBoxSize[0]/nx;
peastman's avatar
Bug fix  
peastman committed
40
            voxelSizeY = periodicBoxSize[1]/ny;
41
42
43
44
45
46
47
48
49
50
51
52
53
54
        }
        else {
            nx = max(1, (int) floorf((maxx-minx)/voxelSizeX+0.5f));
            ny = max(1, (int) floorf((maxy-miny)/voxelSizeY+0.5f));
            if (maxx > minx)
                voxelSizeX = (maxx-minx)/nx;
            if (maxy > miny)
                voxelSizeY = (maxy-miny)/ny;
        }
        bins.resize(nx);
        for (int i = 0; i < nx; i++) {
            bins[i].resize(ny);
            for (int j = 0; j < ny; j++)
                bins[i][j].resize(0);
55
56
57
        }
    }

58
59
60
61
    /**
     * Insert a particle into the voxel data structure.
     */
    void insert(const int& atom, const float* location) {
62
        VoxelIndex voxelIndex = getVoxelIndex(location);
63
64
65
66
67
68
69
70
71
72
        bins[voxelIndex.x][voxelIndex.y].push_back(make_pair(location[2], VoxelItem(location, atom)));
    }
    
    /**
     * Sort the particles in each voxel by z coordinate.
     */
    void sortItems() {
        for (int i = 0; i < nx; i++)
            for (int j = 0; j < ny; j++)
                sort(bins[i][j].begin(), bins[i][j].end());
73
    }
74
    
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
    /**
     * Find the index of the first particle in voxel (x,y) whose z coordinate in >= the specified value.
     */
    int findLowerBound(int x, int y, double z) const {
        const vector<pair<float, VoxelItem> >& bin = bins[x][y];
        int lower = 0;
        int upper = bin.size();
        while (lower < upper) {
            int middle = (lower+upper)/2;
            if (bin[middle].first < z)
                lower = middle+1;
            else
                upper = middle;
        }
        return lower;
    }
    
    /**
     * Find the index of the first particle in voxel (x,y) whose z coordinate in greater than the specified value.
     */
    int findUpperBound(int x, int y, double z) const {
        const vector<pair<float, VoxelItem> >& bin = bins[x][y];
        int lower = 0;
        int upper = bin.size();
        while (lower < upper) {
            int middle = (lower+upper)/2;
            if (bin[middle].first > z)
                upper = middle;
            else
                lower = middle+1;
        }
        return upper;
    }
108

109
110
111
    /**
     * Get the voxel index containing a particular location.
     */
112
    VoxelIndex getVoxelIndex(const float* location) const {
113
        float xperiodic, yperiodic;
114
        if (!usePeriodic) {
115
116
            xperiodic = location[0]-minx;
            yperiodic = location[1]-miny;
117
118
119
120
121
        }
        else {
            xperiodic = location[0]-periodicBoxSize[0]*floorf(location[0]/periodicBoxSize[0]);
            yperiodic = location[1]-periodicBoxSize[1]*floorf(location[1]/periodicBoxSize[1]);
        }
122
123
        int x = min(nx-1, int(floorf(xperiodic / voxelSizeX)));
        int y = min(ny-1, int(floorf(yperiodic / voxelSizeY)));
124
        
125
        return VoxelIndex(x, y);
126
127
    }

128
129
130
    void getNeighbors(vector<int>& neighbors, int blockIndex, fvec4 blockCenter, fvec4 blockWidth, const vector<int>& sortedAtoms, vector<char>& exclusions, float maxDistance, const vector<int> blockAtoms, const float* atomLocations) const {
        neighbors.resize(0);
        exclusions.resize(0);
131
132
        fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
        fvec4 invBoxSize(1/periodicBoxSize[0], 1/periodicBoxSize[1], 1/periodicBoxSize[2], 0);
133
134
        
        float maxDistanceSquared = maxDistance * maxDistance;
135
136
        float refineCutoff = maxDistance-max(max(blockWidth[0], blockWidth[1]), blockWidth[2]);
        float refineCutoffSquared = refineCutoff*refineCutoff;
137

138
139
        int dIndexX = min(nx/2, int((maxDistance+blockWidth[0])/voxelSizeX)+1); // How may voxels away do we have to look?
        int dIndexY = min(ny/2, int((maxDistance+blockWidth[1])/voxelSizeY)+1);
140
141
142
        float centerPos[4];
        blockCenter.store(centerPos);
        VoxelIndex centerVoxelIndex = getVoxelIndex(centerPos);
143
144
145
146
147
        int startx = centerVoxelIndex.x-dIndexX;
        int starty = centerVoxelIndex.y-dIndexY;
        int endx = centerVoxelIndex.x+dIndexX;
        int endy = centerVoxelIndex.y+dIndexY;
        int numRanges;
148
        if (usePeriodic) {
149
150
151
152
153
154
155
156
157
158
            endx = min(endx, centerVoxelIndex.x-dIndexX+nx-1);
            endy = min(endy, centerVoxelIndex.y-dIndexY+ny-1);
            numRanges = 2;
        }
        else {
            startx = max(startx, 0);
            starty = max(starty, 0);
            endx = min(endx, nx-1);
            endy = min(endy, ny-1);
            numRanges = 1;
159
        }
160
        int lastSortedIndex = BlockSize*(blockIndex+1);
161
162
        VoxelIndex voxelIndex(0, 0);
        for (int x = startx; x <= endx; ++x) {
163
164
165
            voxelIndex.x = x;
            if (usePeriodic)
                voxelIndex.x = (x < 0 ? x+nx : (x >= nx ? x-nx : x));
166
167
            float dx = max(0.0f, voxelSizeX*max(0, abs(centerVoxelIndex.x-x)-1)-blockWidth[0]);
            for (int y = starty; y <= endy; ++y) {
168
169
170
                voxelIndex.y = y;
                if (usePeriodic)
                    voxelIndex.y = (y < 0 ? y+ny : (y >= ny ? y-ny : y));
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
                float dy = max(0.0f, voxelSizeY*max(0, abs(centerVoxelIndex.y-y)-1)-blockWidth[1]);
                
                // Identify the range of atoms within this bin we need to search.  When using periodic boundary
                // conditions, there may be two separate ranges.
                
                float dz = maxDistance+blockWidth[2];
                dz = sqrtf(max(0.0f, dz*dz-dx*dx-dy*dy));
                int rangeStart[2];
                int rangeEnd[2];
                rangeStart[0] = findLowerBound(voxelIndex.x, voxelIndex.y, centerPos[2]-dz);
                if (usePeriodic) {
                    rangeEnd[0] = findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz);
                    if (rangeStart[0] > 0) {
                        rangeStart[1] = 0;
                        rangeEnd[1] = min(findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz-periodicBoxSize[2]), rangeStart[0]);
                    }
                    else {
                        rangeStart[1] = max(findLowerBound(voxelIndex.x, voxelIndex.y, centerPos[2]-dz+periodicBoxSize[2]), rangeEnd[0]);
                        rangeEnd[1] = bins[voxelIndex.x][voxelIndex.y].size();
                    }
                }
                else
                    rangeEnd[0] = findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz);
                
                // Loop over atoms and check to see if they are neighbors of this block.
                
                for (int range = 0; range < numRanges; range++) {
                    for (int item = rangeStart[range]; item < rangeEnd[range]; item++) {
                        const int sortedIndex = bins[voxelIndex.x][voxelIndex.y][item].second.second;
200

peastman's avatar
peastman committed
201
                        // Avoid duplicate entries.
202
                        if (sortedIndex >= lastSortedIndex)
203
                            continue;
204
                        
205
                        fvec4 atomPos(bins[voxelIndex.x][voxelIndex.y][item].second.first);
206
                        fvec4 delta = atomPos-centerPos;
207
                        if (usePeriodic) {
208
209
                            fvec4 base = round(delta*invBoxSize)*boxSize;
                            delta = delta-base;
210
                        }
211
                        delta = max(0.0f, abs(delta)-blockWidth);
212
                        float dSquared = dot3(delta, delta);
213
214
                        if (dSquared > maxDistanceSquared)
                            continue;
peastman's avatar
peastman committed
215
                        
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
                        if (dSquared > refineCutoffSquared) {
                            // The distance is large enough that there might not be any actual interactions.
                            // Check individual atom pairs to be sure.
                            
                            bool any = false;
                            for (int k = 0; k < (int) blockAtoms.size(); k++) {
                                fvec4 pos1(&atomLocations[4*blockAtoms[k]]);
                                delta = atomPos-pos1;
                                if (usePeriodic) {
                                    fvec4 base = round(delta*invBoxSize)*boxSize;
                                    delta = delta-base;
                                }
                                float r2 = dot3(delta, delta);
                                if (r2 < maxDistanceSquared) {
                                    any = true;
                                    break;
                                }
                            }
                            if (!any)
                                continue;
                        }
                        
                        // Add this atom to the list of neighbors.
                        
                        neighbors.push_back(sortedAtoms[sortedIndex]);
                        if (sortedIndex < BlockSize*blockIndex)
                            exclusions.push_back(0);
                        else
                            exclusions.push_back(0xF & (0xF<<(sortedIndex-BlockSize*blockIndex)));
245
246
247
248
249
250
251
                    }
                }
            }
        }
    }

private:
252
253
254
    float voxelSizeX, voxelSizeY;
    float minx, maxx, miny, maxy;
    int nx, ny;
255
256
    const float* periodicBoxSize;
    const bool usePeriodic;
257
    vector<vector<vector<pair<float, VoxelItem> > > > bins;
258
259
};

260
261
262
263
264
265
266
class CpuNeighborList::ThreadData {
public:
    ThreadData(int index, CpuNeighborList& owner) : index(index), owner(owner) {
    }
    int index;
    CpuNeighborList& owner;
};
267

268
269
static void* threadBody(void* args) {
    CpuNeighborList::ThreadData& data = *reinterpret_cast<CpuNeighborList::ThreadData*>(args);
270
    data.owner.runThread(data.index);
271
272
273
274
275
276
    delete &data;
    return 0;
}

CpuNeighborList::CpuNeighborList() {
    isDeleted = false;
277
    numThreads = getNumProcessors();
278
279
280
281
    pthread_cond_init(&startCondition, NULL);
    pthread_cond_init(&endCondition, NULL);
    pthread_mutex_init(&lock, NULL);
    thread.resize(numThreads);
peastman's avatar
peastman committed
282
283
    pthread_mutex_lock(&lock);
    waitCount = 0;
284
285
286
287
288
    for (int i = 0; i < numThreads; i++) {
        ThreadData* data = new ThreadData(i, *this);
        threadData.push_back(data);
        pthread_create(&thread[i], NULL, threadBody, data);
    }
peastman's avatar
peastman committed
289
290
291
    while (waitCount < numThreads)
        pthread_cond_wait(&endCondition, &lock);
    pthread_mutex_unlock(&lock);
292
293
294
295
296
297
298
299
300
301
302
303
304
}

CpuNeighborList::~CpuNeighborList() {
    isDeleted = true;
    pthread_mutex_lock(&lock);
    pthread_cond_broadcast(&startCondition);
    pthread_mutex_unlock(&lock);
    for (int i = 0; i < (int) thread.size(); i++)
        pthread_join(thread[i], NULL);
    pthread_mutex_destroy(&lock);
    pthread_cond_destroy(&startCondition);
    pthread_cond_destroy(&endCondition);
}
305

306
307
void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& atomLocations, const vector<set<int> >& exclusions,
            const float* periodicBoxSize, bool usePeriodic, float maxDistance) {
308
309
310
311
312
    int numBlocks = (numAtoms+BlockSize-1)/BlockSize;
    blockNeighbors.resize(numBlocks);
    blockExclusions.resize(numBlocks);
    sortedAtoms.resize(numAtoms);
    
313
    // Record the parameters for the threads.
314
    
315
316
317
318
319
320
321
322
323
324
325
    this->exclusions = &exclusions;
    this->atomLocations = &atomLocations[0];
    this->periodicBoxSize = periodicBoxSize;
    this->numAtoms = numAtoms;
    this->usePeriodic = usePeriodic;
    this->maxDistance = maxDistance;
    
    // Identify the range of atom positions along each axis.
    
    fvec4 minPos(&atomLocations[0]);
    fvec4 maxPos = minPos;
326
    for (int i = 0; i < numAtoms; i++) {
327
328
329
        fvec4 pos(&atomLocations[4*i]);
        minPos = min(minPos, pos);
        maxPos = max(maxPos, pos);
330
    }
331
332
333
334
335
336
337
338
339
340
341
342
    minx = minPos[0];
    maxx = maxPos[0];
    miny = minPos[1];
    maxy = maxPos[1];
    minz = minPos[2];
    maxz = maxPos[2];
    
    // Sort the atoms based on a Hilbert curve.
    
    atomBins.resize(numAtoms);
    pthread_mutex_lock(&lock);
    advanceThreads();
343
344
    sort(atomBins.begin(), atomBins.end());

345
346
    // Build the voxel hash.
    
347
    float edgeSizeX, edgeSizeY;
348
    if (!usePeriodic)
349
        edgeSizeX = edgeSizeY = maxDistance; // TODO - adjust this as needed
350
    else {
351
352
        edgeSizeX = 0.5f*periodicBoxSize[0]/floorf(periodicBoxSize[0]/maxDistance);
        edgeSizeY = 0.5f*periodicBoxSize[1]/floorf(periodicBoxSize[1]/maxDistance);
353
    }
354
    Voxels voxels(edgeSizeX, edgeSizeY, minx, maxx, miny, maxy, periodicBoxSize, usePeriodic);
355
356
357
    for (int i = 0; i < numAtoms; i++) {
        int atomIndex = atomBins[i].second;
        sortedAtoms[i] = atomIndex;
358
        voxels.insert(i, &atomLocations[4*atomIndex]);
359
    }
360
361
    voxels.sortItems();
    this->voxels = &voxels;
362
363
364
    
    // Signal the threads to start running and wait for them to finish.
    
365
    advanceThreads();
366
367
    pthread_mutex_unlock(&lock);
    
368
    // Add padding atoms to fill up the last block.
369
    
370
371
372
373
374
375
376
377
378
    int numPadding = numBlocks*BlockSize-numAtoms;
    if (numPadding > 0) {
        char mask = (0xF0 >> numPadding) & 0xF;
        for (int i = 0; i < numPadding; i++)
            sortedAtoms.push_back(0);
        vector<char>& exc = blockExclusions[blockExclusions.size()-1];
        for (int i = 0; i < (int) exc.size(); i++)
            exc[i] |= mask;
    }
379
380
}

381
382
int CpuNeighborList::getNumBlocks() const {
    return sortedAtoms.size()/BlockSize;
383
384
}

385
386
387
388
389
390
391
392
393
394
395
396
397
398
const std::vector<int>& CpuNeighborList::getSortedAtoms() const {
    return sortedAtoms;
}

const std::vector<int>& CpuNeighborList::getBlockNeighbors(int blockIndex) const {
    return blockNeighbors[blockIndex];
}

const std::vector<char>& CpuNeighborList::getBlockExclusions(int blockIndex) const {
    return blockExclusions[blockIndex];
    
}

void CpuNeighborList::runThread(int index) {
399
400
401
    while (true) {
        // Wait for the signal to start running.
        
402
        threadWait();
403
404
405
        if (isDeleted)
            break;
        
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
        // Compute the positions of atoms along the Hilbert curve.

        float binWidth = max(max(maxx-minx, maxy-miny), maxz-minz)/255.0f;
        float invBinWidth = 1.0f/binWidth;
        bitmask_t coords[3];
        for (int i = index; i < numAtoms; i += numThreads) {
            const float* pos = &atomLocations[4*i];
            coords[0] = (bitmask_t) ((pos[0]-minx)*invBinWidth);
            coords[1] = (bitmask_t) ((pos[1]-miny)*invBinWidth);
            coords[2] = (bitmask_t) ((pos[2]-minz)*invBinWidth);
            int bin = (int) hilbert_c2i(3, 8, coords);
            atomBins[i] = pair<int, int>(bin, i);
        }
        threadWait();
        
421
422
        // Compute this thread's subset of neighbors.
        
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
        int numBlocks = blockNeighbors.size();
        vector<int> blockAtoms;
        for (int i = index; i < numBlocks; i += numThreads) {
            {
            int firstIndex = BlockSize*i;
            int atomsInBlock = min(BlockSize, numAtoms-firstIndex);
            blockAtoms.resize(atomsInBlock);
            for (int j = 0; j < atomsInBlock; j++)
                blockAtoms[j] = sortedAtoms[firstIndex+j];
            }

                        
            int firstIndex = BlockSize*i;
            fvec4 minPos(&atomLocations[4*sortedAtoms[firstIndex]]);
            fvec4 maxPos = minPos;
            int atomsInBlock = min(BlockSize, numAtoms-firstIndex);
            for (int j = 1; j < atomsInBlock; j++) {
                fvec4 pos(&atomLocations[4*sortedAtoms[firstIndex+j]]);
                minPos = min(minPos, pos);
                maxPos = max(maxPos, pos);
            }
444
            voxels->getNeighbors(blockNeighbors[i], i, (maxPos+minPos)*0.5f, (maxPos-minPos)*0.5f, sortedAtoms, blockExclusions[i], maxDistance, blockAtoms, atomLocations);
445
446
447
448
449
450
451
452
453
454
455
456
457
            
            // Record the exclusions for this block.
            
            for (int j = 0; j < atomsInBlock; j++) {
                const set<int>& atomExclusions = (*exclusions)[sortedAtoms[firstIndex+j]];
                char mask = 1<<j;
                for (int k = 0; k < (int) blockNeighbors[i].size(); k++) {
                    int atomIndex = blockNeighbors[i][k];
                    if (atomExclusions.find(atomIndex) != atomExclusions.end())
                        blockExclusions[i][k] |= mask;
                }
            }
        }
458
459
460
    }
}

461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
void CpuNeighborList::threadWait() {
    pthread_mutex_lock(&lock);
    waitCount++;
    pthread_cond_signal(&endCondition);
    pthread_cond_wait(&startCondition, &lock);
    pthread_mutex_unlock(&lock);
}

void CpuNeighborList::advanceThreads() {
    waitCount = 0;
    pthread_cond_broadcast(&startCondition);
    while (waitCount < numThreads) {
        pthread_cond_wait(&endCondition, &lock);
    }
}

477
} // namespace OpenMM