findInteractingBlocks_cpu.cl 10.5 KB
Newer Older
1
2
3
4
5
6
7
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
#define BUFFER_SIZE BUFFER_GROUPS*GROUP_SIZE

/**
 * Find a bounding box for the atoms in each block.
 */
8
9
__kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
        __global const real4* restrict posq, __global real4* restrict blockCenter, __global real4* restrict blockBoundingBox, __global int* restrict rebuildNeighborList,
10
        __global real2* restrict sortedBlocks) {
11
12
13
    int index = get_global_id(0);
    int base = index*TILE_SIZE;
    while (base < numAtoms) {
14
        real4 pos = posq[base];
15
#ifdef USE_PERIODIC
16
        APPLY_PERIODIC_TO_POS(pos)
17
#endif
18
19
        real4 minPos = pos;
        real4 maxPos = pos;
20
21
22
23
        int last = min(base+TILE_SIZE, numAtoms);
        for (int i = base+1; i < last; i++) {
            pos = posq[i];
#ifdef USE_PERIODIC
24
25
            real4 center = 0.5f*(maxPos+minPos);
            APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)
26
27
28
29
#endif
            minPos = min(minPos, pos);
            maxPos = max(maxPos, pos);
        }
30
31
        real4 blockSize = 0.5f*(maxPos-minPos);
        blockBoundingBox[index] = blockSize;
32
        blockCenter[index] = 0.5f*(maxPos+minPos);
33
        sortedBlocks[index] = (real2) (blockSize.x+blockSize.y+blockSize.z, index);
34
35
36
37
        index += get_global_size(0);
        base = index*TILE_SIZE;
    }
    if (get_global_id(0) == 0)
38
39
40
41
42
43
44
45
46
        rebuildNeighborList[0] = 0;
}

/**
 * Sort the data about bounding boxes so it can be accessed more efficiently in the next kernel.
 */
__kernel void sortBoxData(__global const real2* restrict sortedBlock, __global const real4* restrict blockCenter,
        __global const real4* restrict blockBoundingBox, __global real4* restrict sortedBlockCenter,
        __global real4* restrict sortedBlockBoundingBox, __global const real4* restrict posq, __global const real4* restrict oldPositions,
47
        __global unsigned int* restrict interactionCount, __global int* restrict rebuildNeighborList, int forceRebuild) {
48
49
50
51
52
53
54
55
    for (int i = get_global_id(0); i < NUM_BLOCKS; i += get_global_size(0)) {
        int index = (int) sortedBlock[i].y;
        sortedBlockCenter[i] = blockCenter[index];
        sortedBlockBoundingBox[i] = blockBoundingBox[index];
    }
    
    // Also check whether any atom has moved enough so that we really need to rebuild the neighbor list.

56
    bool rebuild = forceRebuild;
57
58
59
60
61
62
63
    for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
        real4 delta = oldPositions[i]-posq[i];
        if (delta.x*delta.x + delta.y*delta.y + delta.z*delta.z > 0.25f*PADDING*PADDING)
            rebuild = true;
    }
    if (rebuild) {
        rebuildNeighborList[0] = 1;
64
        interactionCount[0] = 0;
65
    }
66
67
68
69
70
71
}

/**
 * This is called by findBlocksWithInteractions().  It compacts the list of blocks and writes them
 * to global memory.
 */
72
void storeInteractionData(int x, int* buffer, int* atoms, int* numAtoms, int numValid, __global unsigned int* interactionCount,
73
74
            __global int* interactingTiles, __global unsigned int* interactingAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX,
            real4 periodicBoxVecY, real4 periodicBoxVecZ, __global const real4* posq, real4 blockCenterX, real4 blockSizeX, unsigned int maxTiles, bool finish) {
75
76
77
78
79
80
81
82
83
84
85
    real4 posBuffer[TILE_SIZE];
    const bool singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= PADDED_CUTOFF &&
                                     0.5f*periodicBoxSize.y-blockSizeX.y >= PADDED_CUTOFF &&
                                     0.5f*periodicBoxSize.z-blockSizeX.z >= PADDED_CUTOFF);
    for (int i = 0; i < TILE_SIZE; i++) {
        real4 pos = posq[x*TILE_SIZE+i];
#ifdef USE_PERIODIC
        if (singlePeriodicCopy) {
            // The box is small enough that we can just translate all the atoms into a single periodic
            // box, then skip having to apply periodic boundary conditions later.
            
86
            APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, blockCenterX)
87
        }
88
89
90
#endif
        posBuffer[i] = pos;
    }
91

92
    // Loop over the tiles and find specific interactions in them.
93

94
95
96
97
98
99
    for (int tile = 0; tile < numValid; tile++) {
        for (int indexInTile = 0; indexInTile < TILE_SIZE; indexInTile++) {
            // Check each atom in block Y for interactions.
            
            int atom = buffer[tile]*TILE_SIZE+indexInTile;
            real4 pos = posq[atom];
100
#ifdef USE_PERIODIC
101
            if (singlePeriodicCopy)
102
		APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, blockCenterX)
103
#endif
104
            bool interacts = false;
105
#ifdef USE_PERIODIC
106
107
108
            if (!singlePeriodicCopy) {
                for (int j = 0; j < TILE_SIZE && !interacts; j++) {
                    real4 delta = pos-posBuffer[j];
109
                    APPLY_PERIODIC_TO_DELTA(delta)
110
111
112
113
                    interacts = (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < PADDED_CUTOFF_SQUARED);
                }
            }
            else {
114
#endif
115
116
117
118
119
120
121
122
123
124
125
126
127
                for (int j = 0; j < TILE_SIZE && !interacts; j++) {
                    real4 delta = pos-posBuffer[j];
                    interacts = (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < PADDED_CUTOFF_SQUARED);
                }
#ifdef USE_PERIODIC
            }
#endif
            if (interacts)
                atoms[(*numAtoms)++] = atom;
            if (*numAtoms == BUFFER_SIZE) {
                // The atoms buffer is full, so store it to global memory.
                
                int tilesToStore = BUFFER_SIZE/TILE_SIZE;
128
                int baseIndex = ATOMIC_ADD(interactionCount, tilesToStore);
129
130
                if (baseIndex+tilesToStore <= maxTiles) {
                    for (int i = 0; i < tilesToStore; i++) {
peastman's avatar
peastman committed
131
                        interactingTiles[baseIndex+i] = x;
132
133
134
135
136
137
                        for (int j = 0; j < TILE_SIZE; j++)
                            interactingAtoms[(baseIndex+i)*TILE_SIZE+j] = atoms[i*TILE_SIZE+j];
                    }
                }
                *numAtoms = 0;
            }
138
139
        }
    }
140
141
142
143
144
    
    if (*numAtoms > 0 && finish) {
        // There are some leftover atoms, so save them now.
        
        int tilesToStore = (*numAtoms+TILE_SIZE-1)/TILE_SIZE;
145
        int baseIndex = ATOMIC_ADD(interactionCount, tilesToStore);
146
147
        if (baseIndex+tilesToStore <= maxTiles) {
            for (int i = 0; i < tilesToStore; i++) {
peastman's avatar
peastman committed
148
                interactingTiles[baseIndex+i] = x;
149
150
151
152
153
                for (int j = 0; j < TILE_SIZE; j++) {
                    int index = i*TILE_SIZE+j;
                    interactingAtoms[(baseIndex+i)*TILE_SIZE+j] = (index < *numAtoms ? atoms[index] : NUM_ATOMS);
                }
            }
154
        }
155
    }
156
157
158
159
160
161
}

/**
 * Compare the bounding boxes for each pair of blocks.  If they are sufficiently far apart,
 * mark them as non-interacting.
 */
162
163
164
165
__kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
        __global unsigned int* restrict interactionCount, __global int* restrict interactingTiles, __global unsigned int* restrict interactingAtoms,
        __global const real4* restrict posq, unsigned int maxTiles, unsigned int startBlockIndex, unsigned int numBlocks, __global real2* restrict sortedBlocks,
        __global const real4* restrict sortedBlockCenter, __global const real4* restrict sortedBlockBoundingBox,
166
167
168
169
        __global const unsigned int* restrict exclusionIndices, __global const unsigned int* restrict exclusionRowIndices, __global real4* restrict oldPositions,
        __global const int* restrict rebuildNeighborList) {
    if (rebuildNeighborList[0] == 0)
        return; // The neighbor list doesn't need to be rebuilt.
170
    int buffer[BUFFER_SIZE];
171
172
173
174
175
176
177
178
179
180
181
    int atoms[BUFFER_SIZE];
    int exclusionsForX[MAX_EXCLUSIONS];
    int valuesInBuffer;
    int numAtoms;
    
    // Loop over blocks sorted by size.
    
    for (int i = startBlockIndex+get_group_id(0); i < startBlockIndex+numBlocks; i += get_num_groups(0)) {
        valuesInBuffer = 0;
        numAtoms = 0;
        real2 sortedKey = sortedBlocks[i];
182
        int x = (int) sortedKey.y;
peastman's avatar
peastman committed
183
184
        real4 blockCenterX = sortedBlockCenter[i];
        real4 blockSizeX = sortedBlockBoundingBox[i];
185

186
187
188
189
190
191
192
193
194
195
196
197
        // Load exclusion data for block x.
        
        const int exclusionStart = exclusionRowIndices[x];
        const int exclusionEnd = exclusionRowIndices[x+1];
        const int numExclusions = exclusionEnd-exclusionStart;
        for (int j = 0; j < numExclusions; j++)
            exclusionsForX[j] = exclusionIndices[exclusionStart+j];
        
        // Compare it to other blocks after this one in sorted order.
        
        for (int j = i+1; j < NUM_BLOCKS; j++) {
            real2 sortedKey2 = sortedBlocks[j];
198
            int y = (int) sortedKey2.y;
199
200
201
202
203
204
205
206
            bool hasExclusions = false;
            for (int k = 0; k < numExclusions; k++)
                hasExclusions |= (exclusionsForX[k] == y);
            if (hasExclusions)
                continue;
            real4 blockCenterY = sortedBlockCenter[j];
            real4 blockSizeY = sortedBlockBoundingBox[j];
            real4 delta = blockCenterX-blockCenterY;
207
#ifdef USE_PERIODIC
208
            APPLY_PERIODIC_TO_DELTA(delta)
209
#endif
210
211
212
213
214
            delta.x = max((real) 0, fabs(delta.x)-blockSizeX.x-blockSizeY.x);
            delta.y = max((real) 0, fabs(delta.y)-blockSizeX.y-blockSizeY.y);
            delta.z = max((real) 0, fabs(delta.z)-blockSizeX.z-blockSizeY.z);
            if (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < PADDED_CUTOFF_SQUARED) {
                // Add this tile to the buffer.
215

216
217
                buffer[valuesInBuffer++] = y;
                if (valuesInBuffer == BUFFER_SIZE) {
218
                    storeInteractionData(x, buffer, atoms, &numAtoms, valuesInBuffer, interactionCount, interactingTiles, interactingAtoms, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, posq, blockCenterX, blockSizeX, maxTiles, false);
219
220
                    valuesInBuffer = 0;
                }
221
222
            }
        }
223
        storeInteractionData(x, buffer, atoms, &numAtoms, valuesInBuffer, interactionCount, interactingTiles, interactingAtoms, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, posq, blockCenterX, blockSizeX, maxTiles, true);
224
    }
225
226
227
228
229
    
    // Record the positions the neighbor list is based on.
    
    for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0))
        oldPositions[i] = posq[i];
230
}