findInteractingBlocks_cpu.cl 13.2 KB
Newer Older
1
2
3
4
5
6
7
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
#define BUFFER_SIZE BUFFER_GROUPS*GROUP_SIZE

/**
 * Find a bounding box for the atoms in each block.
 */
8
9
__kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
        __global const real4* restrict posq, __global real4* restrict blockCenter, __global real4* restrict blockBoundingBox, __global int* restrict rebuildNeighborList,
10
        __global real2* restrict blockSizeRange) {
11
12
    int index = get_global_id(0);
    int base = index*TILE_SIZE;
13
    real minSize = 1e38, maxSize = 0;
14
    while (base < numAtoms) {
15
        real4 pos = posq[base];
16
#ifdef USE_PERIODIC
17
        APPLY_PERIODIC_TO_POS(pos)
18
#endif
19
20
        real4 minPos = pos;
        real4 maxPos = pos;
21
22
23
24
        int last = min(base+TILE_SIZE, numAtoms);
        for (int i = base+1; i < last; i++) {
            pos = posq[i];
#ifdef USE_PERIODIC
25
26
            real4 center = 0.5f*(maxPos+minPos);
            APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)
27
28
29
30
#endif
            minPos = min(minPos, pos);
            maxPos = max(maxPos, pos);
        }
31
        real4 blockSize = 0.5f*(maxPos-minPos);
32
33
34
35
36
37
38
39
40
41
42
        real4 center = 0.5f*(maxPos+minPos);
        center.w = 0;
        for (int i = base; i < last; i++) {
            pos = posq[i];
            real4 delta = posq[i]-center;
#ifdef USE_PERIODIC
            APPLY_PERIODIC_TO_DELTA(delta)
#endif
            center.w = max(center.w, delta.x*delta.x+delta.y*delta.y+delta.z*delta.z);
        }
        center.w = sqrt(center.w);
43
        blockBoundingBox[index] = blockSize;
44
45
46
47
        blockCenter[index] = center;
        real totalSize = blockSize.x+blockSize.y+blockSize.z;
        minSize = min(minSize, totalSize);
        maxSize = max(maxSize, totalSize);
48
49
50
        index += get_global_size(0);
        base = index*TILE_SIZE;
    }
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

    // Record the range of sizes seen by threads in this block.

    __local real minBuffer[64], maxBuffer[64];
    minBuffer[get_local_id(0)] = minSize;
    maxBuffer[get_local_id(0)] = maxSize;
    barrier(CLK_LOCAL_MEM_FENCE);
    for (int step = 1; step < 64; step *= 2) {
        if (get_local_id(0)+step < 64 && get_local_id(0)%(2*step) == 0) {
            minBuffer[get_local_id(0)] = min(minBuffer[get_local_id(0)], minBuffer[get_local_id(0)+step]);
            maxBuffer[get_local_id(0)] = max(maxBuffer[get_local_id(0)], maxBuffer[get_local_id(0)+step]);
        }
        barrier(CLK_LOCAL_MEM_FENCE);
    }
    if (get_local_id(0) == 0)
        blockSizeRange[get_group_id(0)] = make_real2(minBuffer[0], maxBuffer[0]);
67
    if (get_global_id(0) == 0)
68
69
70
        rebuildNeighborList[0] = 0;
}

71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
__kernel void computeSortKeys(__global const real4* restrict blockBoundingBox, __global unsigned int* restrict sortedBlocks, __global real2* restrict blockSizeRange, int numSizes) {
    // Find the total range of sizes recorded by all blocks.

    __local real2 sizeRange;
    if (get_local_id(0) == 0) {
        sizeRange = blockSizeRange[0];
        for (int i = 1; i < numSizes; i++) {
            real2 size = blockSizeRange[i];
            sizeRange.x = min(sizeRange.x, size.x);
            sizeRange.y = max(sizeRange.y, size.y);
        }
        sizeRange.x = LOG(sizeRange.x);
        sizeRange.y = LOG(sizeRange.y);
    }
    barrier(CLK_LOCAL_MEM_FENCE);

    // Sort keys store the bin in the high order part and the block in the low
    // order part.

    int numSizeBins = 20;
    real scale = numSizeBins/(sizeRange.y-sizeRange.x);
    for (unsigned int i = get_global_id(0); i < NUM_BLOCKS; i += get_global_size(0)) {
        real4 box = blockBoundingBox[i];
        real size = LOG(box.x+box.y+box.z);
        int bin = (size-sizeRange.x)*scale;
        bin = max(0, min(bin, numSizeBins-1));
        sortedBlocks[i] = (((unsigned int) bin)<<BIN_SHIFT) + i;
    }
}

101
102
103
/**
 * Sort the data about bounding boxes so it can be accessed more efficiently in the next kernel.
 */
104
__kernel void sortBoxData(__global const unsigned int* restrict sortedBlocks, __global const real4* restrict blockCenter,
105
106
        __global const real4* restrict blockBoundingBox, __global real4* restrict sortedBlockCenter,
        __global real4* restrict sortedBlockBoundingBox, __global const real4* restrict posq, __global const real4* restrict oldPositions,
107
108
109
110
111
112
        __global unsigned int* restrict interactionCount, __global int* restrict rebuildNeighborList, int forceRebuild
#ifdef USE_LARGE_BLOCKS
        , __global real4* restrict largeBlockCenter, __global real4* restrict largeBlockBoundingBox, real4 periodicBoxSize,
        real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
#endif
        ) {
113
    for (int i = get_global_id(0); i < NUM_BLOCKS; i += get_global_size(0)) {
114
        unsigned int index = sortedBlocks[i] & BLOCK_INDEX_MASK;
115
116
117
118
119
120
        sortedBlockCenter[i] = blockCenter[index];
        sortedBlockBoundingBox[i] = blockBoundingBox[index];
    }
    
    // Also check whether any atom has moved enough so that we really need to rebuild the neighbor list.

121
    bool rebuild = forceRebuild;
122
123
124
125
126
127
128
    for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
        real4 delta = oldPositions[i]-posq[i];
        if (delta.x*delta.x + delta.y*delta.y + delta.z*delta.z > 0.25f*PADDING*PADDING)
            rebuild = true;
    }
    if (rebuild) {
        rebuildNeighborList[0] = 1;
129
        interactionCount[0] = 0;
130
    }
131
132
133
134
135
136
}

/**
 * This is called by findBlocksWithInteractions().  It compacts the list of blocks and writes them
 * to global memory.
 */
137
void storeInteractionData(int x, int* buffer, int* atoms, int* numAtoms, int numValid, __global unsigned int* interactionCount,
138
139
            __global int* interactingTiles, __global unsigned int* interactingAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX,
            real4 periodicBoxVecY, real4 periodicBoxVecZ, __global const real4* posq, real4 blockCenterX, real4 blockSizeX, unsigned int maxTiles, bool finish) {
140
141
142
143
144
145
146
147
148
149
150
    real4 posBuffer[TILE_SIZE];
    const bool singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= PADDED_CUTOFF &&
                                     0.5f*periodicBoxSize.y-blockSizeX.y >= PADDED_CUTOFF &&
                                     0.5f*periodicBoxSize.z-blockSizeX.z >= PADDED_CUTOFF);
    for (int i = 0; i < TILE_SIZE; i++) {
        real4 pos = posq[x*TILE_SIZE+i];
#ifdef USE_PERIODIC
        if (singlePeriodicCopy) {
            // The box is small enough that we can just translate all the atoms into a single periodic
            // box, then skip having to apply periodic boundary conditions later.
            
151
            APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, blockCenterX)
152
        }
153
154
155
#endif
        posBuffer[i] = pos;
    }
156

157
    // Loop over the tiles and find specific interactions in them.
158

159
160
161
162
163
164
    for (int tile = 0; tile < numValid; tile++) {
        for (int indexInTile = 0; indexInTile < TILE_SIZE; indexInTile++) {
            // Check each atom in block Y for interactions.
            
            int atom = buffer[tile]*TILE_SIZE+indexInTile;
            real4 pos = posq[atom];
165
#ifdef USE_PERIODIC
166
            if (singlePeriodicCopy)
167
		APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, blockCenterX)
168
#endif
169
            bool interacts = false;
170
#ifdef USE_PERIODIC
171
172
173
            if (!singlePeriodicCopy) {
                for (int j = 0; j < TILE_SIZE && !interacts; j++) {
                    real4 delta = pos-posBuffer[j];
174
                    APPLY_PERIODIC_TO_DELTA(delta)
175
176
177
178
                    interacts = (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < PADDED_CUTOFF_SQUARED);
                }
            }
            else {
179
#endif
180
181
182
183
184
185
186
187
188
189
190
191
192
                for (int j = 0; j < TILE_SIZE && !interacts; j++) {
                    real4 delta = pos-posBuffer[j];
                    interacts = (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < PADDED_CUTOFF_SQUARED);
                }
#ifdef USE_PERIODIC
            }
#endif
            if (interacts)
                atoms[(*numAtoms)++] = atom;
            if (*numAtoms == BUFFER_SIZE) {
                // The atoms buffer is full, so store it to global memory.
                
                int tilesToStore = BUFFER_SIZE/TILE_SIZE;
193
                int baseIndex = ATOMIC_ADD(interactionCount, tilesToStore);
194
195
                if (baseIndex+tilesToStore <= maxTiles) {
                    for (int i = 0; i < tilesToStore; i++) {
peastman's avatar
peastman committed
196
                        interactingTiles[baseIndex+i] = x;
197
198
199
200
201
202
                        for (int j = 0; j < TILE_SIZE; j++)
                            interactingAtoms[(baseIndex+i)*TILE_SIZE+j] = atoms[i*TILE_SIZE+j];
                    }
                }
                *numAtoms = 0;
            }
203
204
        }
    }
205
206
207
208
209
    
    if (*numAtoms > 0 && finish) {
        // There are some leftover atoms, so save them now.
        
        int tilesToStore = (*numAtoms+TILE_SIZE-1)/TILE_SIZE;
210
        int baseIndex = ATOMIC_ADD(interactionCount, tilesToStore);
211
212
        if (baseIndex+tilesToStore <= maxTiles) {
            for (int i = 0; i < tilesToStore; i++) {
peastman's avatar
peastman committed
213
                interactingTiles[baseIndex+i] = x;
214
215
216
217
218
                for (int j = 0; j < TILE_SIZE; j++) {
                    int index = i*TILE_SIZE+j;
                    interactingAtoms[(baseIndex+i)*TILE_SIZE+j] = (index < *numAtoms ? atoms[index] : NUM_ATOMS);
                }
            }
219
        }
220
    }
221
222
223
224
225
226
}

/**
 * Compare the bounding boxes for each pair of blocks.  If they are sufficiently far apart,
 * mark them as non-interacting.
 */
227
228
__kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
        __global unsigned int* restrict interactionCount, __global int* restrict interactingTiles, __global unsigned int* restrict interactingAtoms,
229
        __global const real4* restrict posq, unsigned int maxTiles, unsigned int startBlockIndex, unsigned int numBlocks, __global unsigned int* restrict sortedBlocks,
230
        __global const real4* restrict sortedBlockCenter, __global const real4* restrict sortedBlockBoundingBox,
231
        __global const unsigned int* restrict exclusionIndices, __global const unsigned int* restrict exclusionRowIndices, __global real4* restrict oldPositions,
232
233
234
235
236
        __global const int* restrict rebuildNeighborList
#ifdef USE_LARGE_BLOCKS
        , __global real4* restrict largeBlockCenter, __global real4* restrict largeBlockBoundingBox
#endif
        ) {
237
238
    if (rebuildNeighborList[0] == 0)
        return; // The neighbor list doesn't need to be rebuilt.
239
    int buffer[BUFFER_SIZE];
240
241
242
243
244
245
246
247
248
249
    int atoms[BUFFER_SIZE];
    int exclusionsForX[MAX_EXCLUSIONS];
    int valuesInBuffer;
    int numAtoms;
    
    // Loop over blocks sorted by size.
    
    for (int i = startBlockIndex+get_group_id(0); i < startBlockIndex+numBlocks; i += get_num_groups(0)) {
        valuesInBuffer = 0;
        numAtoms = 0;
250
        int x = sortedBlocks[i] & BLOCK_INDEX_MASK;
peastman's avatar
peastman committed
251
252
        real4 blockCenterX = sortedBlockCenter[i];
        real4 blockSizeX = sortedBlockBoundingBox[i];
253

254
255
256
257
258
259
260
261
262
263
264
265
        // Load exclusion data for block x.
        
        const int exclusionStart = exclusionRowIndices[x];
        const int exclusionEnd = exclusionRowIndices[x+1];
        const int numExclusions = exclusionEnd-exclusionStart;
        for (int j = 0; j < numExclusions; j++)
            exclusionsForX[j] = exclusionIndices[exclusionStart+j];
        
        // Compare it to other blocks after this one in sorted order.
        
        for (int j = i+1; j < NUM_BLOCKS; j++) {
            real2 sortedKey2 = sortedBlocks[j];
266
            int y = sortedBlocks[j] & BLOCK_INDEX_MASK;
267
268
269
270
271
272
273
274
            bool hasExclusions = false;
            for (int k = 0; k < numExclusions; k++)
                hasExclusions |= (exclusionsForX[k] == y);
            if (hasExclusions)
                continue;
            real4 blockCenterY = sortedBlockCenter[j];
            real4 blockSizeY = sortedBlockBoundingBox[j];
            real4 delta = blockCenterX-blockCenterY;
275
#ifdef USE_PERIODIC
276
            APPLY_PERIODIC_TO_DELTA(delta)
277
#endif
278
279
280
281
282
            delta.x = max((real) 0, fabs(delta.x)-blockSizeX.x-blockSizeY.x);
            delta.y = max((real) 0, fabs(delta.y)-blockSizeX.y-blockSizeY.y);
            delta.z = max((real) 0, fabs(delta.z)-blockSizeX.z-blockSizeY.z);
            if (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < PADDED_CUTOFF_SQUARED) {
                // Add this tile to the buffer.
283

284
285
                buffer[valuesInBuffer++] = y;
                if (valuesInBuffer == BUFFER_SIZE) {
286
                    storeInteractionData(x, buffer, atoms, &numAtoms, valuesInBuffer, interactionCount, interactingTiles, interactingAtoms, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, posq, blockCenterX, blockSizeX, maxTiles, false);
287
288
                    valuesInBuffer = 0;
                }
289
290
            }
        }
291
        storeInteractionData(x, buffer, atoms, &numAtoms, valuesInBuffer, interactionCount, interactingTiles, interactingAtoms, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, posq, blockCenterX, blockSizeX, maxTiles, true);
292
    }
293
294
295
296
297
    
    // Record the positions the neighbor list is based on.
    
    for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0))
        oldPositions[i] = posq[i];
298
}