CpuNeighborList.cpp 14.8 KB
Newer Older
1
#include "CpuNeighborList.h"
2
#include "openmm/internal/hardware.h"
3
#include "openmm/internal/vectorize.h"
4
#include "hilbert.h"
5
#include <algorithm>
6
7
8
#include <set>
#include <map>
#include <cmath>
9
#include <smmintrin.h>
10
11
12
13
14

using namespace std;

namespace OpenMM {

15
16
const int CpuNeighborList::BlockSize = 4;

17
18
19
class VoxelIndex 
{
public:
20
21
    VoxelIndex(int xx, int yy, int zz) : x(xx), y(yy), z(zz) {
    }
22
23
24
25
26
27
28
29
30
31

    // operator<() needed for map
    bool operator<(const VoxelIndex& other) const {
        if      (x < other.x) return true;
        else if (x > other.x) return false;
        else if (y < other.y) return true;
        else if (y > other.y) return false;
        else if (z < other.z) return true;
        else return false;
    }
32
    
33
34
35
36
37
    int x;
    int y;
    int z;
};

38
39
typedef pair<const float*, int> VoxelItem;
typedef vector< VoxelItem > Voxel;
40

41
class CpuNeighborList::VoxelHash {
42
43
44
45
46
47
48
public:
    VoxelHash(float vsx, float vsy, float vsz, const float* periodicBoxSize, bool usePeriodic) :
            voxelSizeX(vsx), voxelSizeY(vsy), voxelSizeZ(vsz), periodicBoxSize(periodicBoxSize), usePeriodic(usePeriodic) {
        if (usePeriodic) {
            nx = (int) floorf(periodicBoxSize[0]/voxelSizeX+0.5f);
            ny = (int) floorf(periodicBoxSize[1]/voxelSizeY+0.5f);
            nz = (int) floorf(periodicBoxSize[2]/voxelSizeZ+0.5f);
49
            voxelSizeX = periodicBoxSize[0]/nx;
peastman's avatar
Bug fix  
peastman committed
50
51
            voxelSizeY = periodicBoxSize[1]/ny;
            voxelSizeZ = periodicBoxSize[2]/nz;
52
53
54
        }
    }

55
    void insert(const int& item, const float* location) {
56
        VoxelIndex voxelIndex = getVoxelIndex(location);
57
58
        if (voxelMap.find(voxelIndex) == voxelMap.end())
            voxelMap[voxelIndex] = Voxel(); 
59
60
61
        Voxel& voxel = voxelMap.find(voxelIndex)->second;
        voxel.push_back(VoxelItem(location, item));
    }
62
    
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82

    VoxelIndex getVoxelIndex(const float* location) const {
        float xperiodic, yperiodic, zperiodic;
        if (!usePeriodic) {
            xperiodic = location[0];
            yperiodic = location[1];
            zperiodic = location[2];
        }
        else {
            xperiodic = location[0]-periodicBoxSize[0]*floorf(location[0]/periodicBoxSize[0]);
            yperiodic = location[1]-periodicBoxSize[1]*floorf(location[1]/periodicBoxSize[1]);
            zperiodic = location[2]-periodicBoxSize[2]*floorf(location[2]/periodicBoxSize[2]);
        }
        int x = int(floorf(xperiodic / voxelSizeX));
        int y = int(floorf(yperiodic / voxelSizeY));
        int z = int(floorf(zperiodic / voxelSizeZ));
        
        return VoxelIndex(x, y, z);
    }

83
84
85
    void getNeighbors(vector<int>& neighbors, int blockIndex, fvec4 blockCenter, fvec4 blockWidth, const vector<int>& sortedAtoms, vector<char>& exclusions, float maxDistance, const vector<int> blockAtoms, const float* atomLocations) const {
        neighbors.resize(0);
        exclusions.resize(0);
86
87
        fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
        fvec4 invBoxSize(1/periodicBoxSize[0], 1/periodicBoxSize[1], 1/periodicBoxSize[2], 0);
88
89
        
        float maxDistanceSquared = maxDistance * maxDistance;
90
91
        float refineCutoff = maxDistance-max(max(blockWidth[0], blockWidth[1]), blockWidth[2]);
        float refineCutoffSquared = refineCutoff*refineCutoff;
92

93
94
95
96
97
98
        int dIndexX = int((maxDistance+blockWidth[0])/voxelSizeX) + 1; // How may voxels away do we have to look?
        int dIndexY = int((maxDistance+blockWidth[1])/voxelSizeY) + 1;
        int dIndexZ = int((maxDistance+blockWidth[2])/voxelSizeZ) + 1;
        float centerPos[4];
        blockCenter.store(centerPos);
        VoxelIndex centerVoxelIndex = getVoxelIndex(centerPos);
99
100
101
102
103
104
105
106
        int lastx = centerVoxelIndex.x+dIndexX;
        int lasty = centerVoxelIndex.y+dIndexY;
        int lastz = centerVoxelIndex.z+dIndexZ;
        if (usePeriodic) {
            lastx = min(lastx, centerVoxelIndex.x-dIndexX+nx-1);
            lasty = min(lasty, centerVoxelIndex.y-dIndexY+ny-1);
            lastz = min(lastz, centerVoxelIndex.z-dIndexZ+nz-1);
        }
107
        int lastSortedIndex = BlockSize*(blockIndex+1);
108
        VoxelIndex voxelIndex(0, 0, 0);
109
        for (int x = centerVoxelIndex.x - dIndexX; x <= lastx; ++x) {
110
111
112
            voxelIndex.x = x;
            if (usePeriodic)
                voxelIndex.x = (x < 0 ? x+nx : (x >= nx ? x-nx : x));
113
            for (int y = centerVoxelIndex.y - dIndexY; y <= lasty; ++y) {
114
115
116
                voxelIndex.y = y;
                if (usePeriodic)
                    voxelIndex.y = (y < 0 ? y+ny : (y >= ny ? y-ny : y));
117
                for (int z = centerVoxelIndex.z - dIndexZ; z <= lastz; ++z) {
118
119
120
                    voxelIndex.z = z;
                    if (usePeriodic)
                        voxelIndex.z = (z < 0 ? z+nz : (z >= nz ? z-nz : z));
121
                    const map<VoxelIndex, Voxel>::const_iterator voxelEntry = voxelMap.find(voxelIndex);
122
123
                    if (voxelEntry == voxelMap.end())
                        continue; // no such voxel; skip
124
125
                    const Voxel& voxel = voxelEntry->second;
                    for (Voxel::const_iterator itemIter = voxel.begin(); itemIter != voxel.end(); ++itemIter) {
126
                        const int sortedIndex = itemIter->second;
127

peastman's avatar
peastman committed
128
                        // Avoid duplicate entries.
129
                        if (sortedIndex >= lastSortedIndex)
peastman's avatar
peastman committed
130
                            break;
131
                        
132
133
                        fvec4 atomPos(itemIter->first);
                        fvec4 delta = atomPos-centerPos;
134
                        if (usePeriodic) {
135
136
                            fvec4 base = round(delta*invBoxSize)*boxSize;
                            delta = delta-base;
137
                        }
138
                        delta = max(0.0f, abs(delta)-blockWidth);
139
                        float dSquared = dot3(delta, delta);
140
141
                        if (dSquared > maxDistanceSquared)
                            continue;
peastman's avatar
peastman committed
142
                        
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
                        if (dSquared > refineCutoffSquared) {
                            // The distance is large enough that there might not be any actual interactions.
                            // Check individual atom pairs to be sure.
                            
                            bool any = false;
                            for (int k = 0; k < (int) blockAtoms.size(); k++) {
                                fvec4 pos1(&atomLocations[4*blockAtoms[k]]);
                                delta = atomPos-pos1;
                                if (usePeriodic) {
                                    fvec4 base = round(delta*invBoxSize)*boxSize;
                                    delta = delta-base;
                                }
                                float r2 = dot3(delta, delta);
                                if (r2 < maxDistanceSquared) {
                                    any = true;
                                    break;
                                }
                            }
                            if (!any)
                                continue;
                        }
                        
                        // Add this atom to the list of neighbors.
                        
                        neighbors.push_back(sortedAtoms[sortedIndex]);
                        if (sortedIndex < BlockSize*blockIndex)
                            exclusions.push_back(0);
                        else
                            exclusions.push_back(0xF & (0xF<<(sortedIndex-BlockSize*blockIndex)));
172
173
174
175
176
177
178
179
180
181
182
                    }
                }
            }
        }
    }

private:
    float voxelSizeX, voxelSizeY, voxelSizeZ;
    int nx, ny, nz;
    const float* periodicBoxSize;
    const bool usePeriodic;
183
    map<VoxelIndex, Voxel> voxelMap;
184
185
};

186
187
188
189
190
191
192
class CpuNeighborList::ThreadData {
public:
    ThreadData(int index, CpuNeighborList& owner) : index(index), owner(owner) {
    }
    int index;
    CpuNeighborList& owner;
};
193

194
195
static void* threadBody(void* args) {
    CpuNeighborList::ThreadData& data = *reinterpret_cast<CpuNeighborList::ThreadData*>(args);
196
    data.owner.runThread(data.index);
197
198
199
200
201
202
    delete &data;
    return 0;
}

CpuNeighborList::CpuNeighborList() {
    isDeleted = false;
203
    numThreads = getNumProcessors();
204
205
206
207
    pthread_cond_init(&startCondition, NULL);
    pthread_cond_init(&endCondition, NULL);
    pthread_mutex_init(&lock, NULL);
    thread.resize(numThreads);
peastman's avatar
peastman committed
208
209
    pthread_mutex_lock(&lock);
    waitCount = 0;
210
211
212
213
214
    for (int i = 0; i < numThreads; i++) {
        ThreadData* data = new ThreadData(i, *this);
        threadData.push_back(data);
        pthread_create(&thread[i], NULL, threadBody, data);
    }
peastman's avatar
peastman committed
215
216
217
    while (waitCount < numThreads)
        pthread_cond_wait(&endCondition, &lock);
    pthread_mutex_unlock(&lock);
218
219
220
221
222
223
224
225
226
227
228
229
230
}

CpuNeighborList::~CpuNeighborList() {
    isDeleted = true;
    pthread_mutex_lock(&lock);
    pthread_cond_broadcast(&startCondition);
    pthread_mutex_unlock(&lock);
    for (int i = 0; i < (int) thread.size(); i++)
        pthread_join(thread[i], NULL);
    pthread_mutex_destroy(&lock);
    pthread_cond_destroy(&startCondition);
    pthread_cond_destroy(&endCondition);
}
231

232
233
void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& atomLocations, const vector<set<int> >& exclusions,
            const float* periodicBoxSize, bool usePeriodic, float maxDistance) {
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
    int numBlocks = (numAtoms+BlockSize-1)/BlockSize;
    blockNeighbors.resize(numBlocks);
    blockExclusions.resize(numBlocks);
    sortedAtoms.resize(numAtoms);
    
    // Sort the atoms based on a Hilbert curve.
    
    float minx = atomLocations[0], maxx = atomLocations[0];
    float miny = atomLocations[1], maxy = atomLocations[1];
    float minz = atomLocations[2], maxz = atomLocations[2];
    for (int i = 0; i < numAtoms; i++) {
        const float* pos = &atomLocations[4*i];
        if (pos[0] < minx)
            minx = pos[0];
        if (pos[1] < miny)
            miny = pos[1];
        if (pos[2] < minz)
            minz = pos[2];
        if (pos[0] > maxx)
            maxx = pos[0];
        if (pos[1] > maxy)
            maxy = pos[1];
        if (pos[2] > maxz)
            maxz = pos[2];
    }
    float binWidth = max(max(maxx-minx, maxy-miny), maxz-minz)/255.0f;
    float invBinWidth = 1.0f/binWidth;
    vector<pair<int, int> > atomBins(numAtoms);
    bitmask_t coords[3];
    for (int i = 0; i < numAtoms; i++) {
        const float* pos = &atomLocations[4*i];
        coords[0] = (bitmask_t) ((pos[0]-minx)*invBinWidth);
        coords[1] = (bitmask_t) ((pos[1]-miny)*invBinWidth);
        coords[2] = (bitmask_t) ((pos[2]-minz)*invBinWidth);
        int bin = (int) hilbert_c2i(3, 8, coords);
        atomBins[i] = pair<int, int>(bin, i);
    }
    sort(atomBins.begin(), atomBins.end());

273
274
    // Build the voxel hash.
    
275
276
277
278
    float edgeSizeX, edgeSizeY, edgeSizeZ;
    if (!usePeriodic)
        edgeSizeX = edgeSizeY = edgeSizeZ = maxDistance; // TODO - adjust this as needed
    else {
279
280
281
        edgeSizeX = 0.5f*periodicBoxSize[0]/floorf(periodicBoxSize[0]/maxDistance);
        edgeSizeY = 0.5f*periodicBoxSize[1]/floorf(periodicBoxSize[1]/maxDistance);
        edgeSizeZ = 0.5f*periodicBoxSize[2]/floorf(periodicBoxSize[2]/maxDistance);
282
283
    }
    VoxelHash voxelHash(edgeSizeX, edgeSizeY, edgeSizeZ, periodicBoxSize, usePeriodic);
284
285
286
287
288
    for (int i = 0; i < numAtoms; i++) {
        int atomIndex = atomBins[i].second;
        sortedAtoms[i] = atomIndex;
        voxelHash.insert(i, &atomLocations[4*atomIndex]);
    }
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
    
    // Record the parameters for the threads.
    
    this->voxelHash = &voxelHash;
    this->exclusions = &exclusions;
    this->atomLocations = &atomLocations[0];
    this->periodicBoxSize = periodicBoxSize;
    this->numAtoms = numAtoms;
    this->usePeriodic = usePeriodic;
    this->maxDistance = maxDistance;
    
    // Signal the threads to start running and wait for them to finish.
    
    pthread_mutex_lock(&lock);
    waitCount = 0;
    pthread_cond_broadcast(&startCondition);
    while (waitCount < numThreads)
        pthread_cond_wait(&endCondition, &lock);
    pthread_mutex_unlock(&lock);
    
309
    // Add padding atoms to fill up the last block.
310
    
311
312
313
314
315
316
317
318
319
    int numPadding = numBlocks*BlockSize-numAtoms;
    if (numPadding > 0) {
        char mask = (0xF0 >> numPadding) & 0xF;
        for (int i = 0; i < numPadding; i++)
            sortedAtoms.push_back(0);
        vector<char>& exc = blockExclusions[blockExclusions.size()-1];
        for (int i = 0; i < (int) exc.size(); i++)
            exc[i] |= mask;
    }
320
321
}

322
323
int CpuNeighborList::getNumBlocks() const {
    return sortedAtoms.size()/BlockSize;
324
325
}

326
327
328
329
330
331
332
333
334
335
336
337
338
339
const std::vector<int>& CpuNeighborList::getSortedAtoms() const {
    return sortedAtoms;
}

const std::vector<int>& CpuNeighborList::getBlockNeighbors(int blockIndex) const {
    return blockNeighbors[blockIndex];
}

const std::vector<char>& CpuNeighborList::getBlockExclusions(int blockIndex) const {
    return blockExclusions[blockIndex];
    
}

void CpuNeighborList::runThread(int index) {
340
341
342
343
344
345
346
347
348
349
350
351
352
    while (true) {
        // Wait for the signal to start running.
        
        pthread_mutex_lock(&lock);
        waitCount++;
        pthread_cond_signal(&endCondition);
        pthread_cond_wait(&startCondition, &lock);
        pthread_mutex_unlock(&lock);
        if (isDeleted)
            break;
        
        // Compute this thread's subset of neighbors.
        
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
        int numBlocks = blockNeighbors.size();
        vector<int> blockAtoms;
        for (int i = index; i < numBlocks; i += numThreads) {
            {
            int firstIndex = BlockSize*i;
            int atomsInBlock = min(BlockSize, numAtoms-firstIndex);
            blockAtoms.resize(atomsInBlock);
            for (int j = 0; j < atomsInBlock; j++)
                blockAtoms[j] = sortedAtoms[firstIndex+j];
            }

                        
            int firstIndex = BlockSize*i;
            fvec4 minPos(&atomLocations[4*sortedAtoms[firstIndex]]);
            fvec4 maxPos = minPos;
            int atomsInBlock = min(BlockSize, numAtoms-firstIndex);
            for (int j = 1; j < atomsInBlock; j++) {
                fvec4 pos(&atomLocations[4*sortedAtoms[firstIndex+j]]);
                minPos = min(minPos, pos);
                maxPos = max(maxPos, pos);
            }
            voxelHash->getNeighbors(blockNeighbors[i], i, (maxPos+minPos)*0.5f, (maxPos-minPos)*0.5f, sortedAtoms, blockExclusions[i], maxDistance, blockAtoms, atomLocations);
            
            // Record the exclusions for this block.
            
            for (int j = 0; j < atomsInBlock; j++) {
                const set<int>& atomExclusions = (*exclusions)[sortedAtoms[firstIndex+j]];
                char mask = 1<<j;
                for (int k = 0; k < (int) blockNeighbors[i].size(); k++) {
                    int atomIndex = blockNeighbors[i][k];
                    if (atomExclusions.find(atomIndex) != atomExclusions.end())
                        blockExclusions[i][k] |= mask;
                }
            }
        }
388
389
390
    }
}

391
} // namespace OpenMM