customGBValueN2_cpu.cc 12.9 KB
Newer Older
1
2
3
/**
 * Compute a value based on pair interactions.
 */
4
KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsigned int* RESTRICT exclusions,
5
        GLOBAL const int2* exclusionTiles,
6
        GLOBAL mm_ulong* RESTRICT global_value,
7
#ifdef USE_CUTOFF
8
9
10
        GLOBAL const int* RESTRICT tiles, GLOBAL const unsigned int* RESTRICT interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
        real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, GLOBAL const real4* RESTRICT blockCenter,
        GLOBAL const real4* RESTRICT blockSize, GLOBAL const int* RESTRICT interactingAtoms
11
12
13
14
#else
        unsigned int numTiles
#endif
        PARAMETER_ARGUMENTS) {
15
16
17
    LOCAL real3 local_pos[LOCAL_BUFFER_SIZE];
    LOCAL real local_value[LOCAL_BUFFER_SIZE];
    ATOM_PARAMETER_DATA
18
19
20

    // First loop: process tiles that contain exclusions.
    
21
22
    const int firstExclusionTile = FIRST_EXCLUSION_TILE+get_group_id(0)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/get_num_groups(0);
    const int lastExclusionTile = FIRST_EXCLUSION_TILE+(get_group_id(0)+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/get_num_groups(0);
23
    for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) {
24
        const int2 tileIndices = exclusionTiles[pos];
25
26
27
28
29
30
31
        const unsigned int x = tileIndices.x;
        const unsigned int y = tileIndices.y;

        // Load the data for this tile.

        for (int localAtomIndex = 0; localAtomIndex < TILE_SIZE; localAtomIndex++) {
            unsigned int j = y*TILE_SIZE + localAtomIndex;
32
            local_pos[localAtomIndex] = trimTo3(posq[j]);
33
34
35
36
37
38
39
40
41
42
43
            LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
        }
        if (x == y) {
            // This tile is on the diagonal.

            for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef USE_EXCLUSIONS
                unsigned int excl = exclusions[pos*TILE_SIZE+tgx];
#endif
                unsigned int atom1 = x*TILE_SIZE+tgx;
                real value = 0;
44
                real3 pos1 = trimTo3(posq[atom1]);
45
46
                LOAD_ATOM1_PARAMETERS
                for (unsigned int j = 0; j < TILE_SIZE; j++) {
47
48
                    real3 pos2 = local_pos[j];
                    real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
49
#ifdef USE_PERIODIC
50
                    APPLY_PERIODIC_TO_DELTA(delta)
51
52
#endif
                    real r2 = dot(delta.xyz, delta.xyz);
53
#ifdef USE_CUTOFF
54
55
56
                    if (r2 < CUTOFF_SQUARED) {
#endif
                        real invR = RSQRT(r2);
peastman's avatar
peastman committed
57
                        real r = r2*invR;
58
59
60
61
62
63
64
65
                        unsigned int atom2 = j;
                        LOAD_ATOM2_PARAMETERS
                        atom2 = y*TILE_SIZE+j;
                        real tempValue1 = 0;
                        real tempValue2 = 0;
#ifdef USE_EXCLUSIONS
                        bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
                        if (!isExcluded && atom1 != atom2) {
66
#else
67
                        if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2) {
68
#endif
69
70
71
                            COMPUTE_VALUE
                        }
                        value += tempValue1;
72
                        ADD_TEMP_DERIVS1
73
#ifdef USE_CUTOFF
74
                    }
75
76
#endif
#ifdef USE_EXCLUSIONS
77
                    excl >>= 1;
78
#endif
79
                }
80

81
                // Write results.
82

peastman's avatar
peastman committed
83
                unsigned int offset1 = atom1;
84
                ATOMIC_ADD(&global_value[offset1], (mm_ulong) realToFixedPoint(value));
peastman's avatar
peastman committed
85
                STORE_PARAM_DERIVS1
86
87
            }
        }
88
89
        else {
            // This is an off-diagonal tile.
90

91
92
            for (int tgx = 0; tgx < TILE_SIZE; tgx++)
                local_value[tgx] = 0;
93
94
            for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef USE_EXCLUSIONS
95
                unsigned int excl = exclusions[pos*TILE_SIZE+tgx];
96
97
#endif
                unsigned int atom1 = x*TILE_SIZE+tgx;
98
                real value = 0;
99
                real3 pos1 = trimTo3(posq[atom1]);
100
101
                LOAD_ATOM1_PARAMETERS
                for (unsigned int j = 0; j < TILE_SIZE; j++) {
102
103
                    real3 pos2 = local_pos[j];
                    real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
104
#ifdef USE_PERIODIC
105
                    APPLY_PERIODIC_TO_DELTA(delta)
106
#endif
107
                    real r2 = dot(delta.xyz, delta.xyz);
108
109
110
#ifdef USE_CUTOFF
                    if (r2 < CUTOFF_SQUARED) {
#endif
111
                        real invR = RSQRT(r2);
peastman's avatar
peastman committed
112
                        real r = r2*invR;
113
114
115
116
117
                        unsigned int atom2 = j;
                        LOAD_ATOM2_PARAMETERS
                        atom2 = y*TILE_SIZE+j;
                        real tempValue1 = 0;
                        real tempValue2 = 0;
118
#ifdef USE_EXCLUSIONS
119
120
                        bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
                        if (!isExcluded) {
121
#else
122
                        if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2) {
123
#endif
124
125
126
127
                            COMPUTE_VALUE
                        }
                        value += tempValue1;
                        local_value[j] += tempValue2;
128
129
                        ADD_TEMP_DERIVS1
                        ADD_TEMP_DERIVS2
130
131
132
133
134
135
136
137
#ifdef USE_CUTOFF
                    }
#endif
#ifdef USE_EXCLUSIONS
                    excl >>= 1;
#endif
                }

138
                // Write results for atom1.
139

140
                unsigned int offset1 = atom1;
141
                ATOMIC_ADD(&global_value[offset1], (mm_ulong) realToFixedPoint(value));
142
                STORE_PARAM_DERIVS1
143
144
145
146
147
            }

            // Write results.

            for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
148
                unsigned int offset2 = y*TILE_SIZE+tgx;
149
                ATOMIC_ADD(&global_value[offset2], (mm_ulong) realToFixedPoint(local_value[tgx]));
150
                STORE_PARAM_DERIVS2
151
152
            }
        }
153
    }
154

155
156
    // Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
    // of them (no cutoff).
157

158
159
#ifdef USE_CUTOFF
    const unsigned int numTiles = interactionCount[0];
160
161
    if (numTiles > maxTiles)
        return; // There wasn't enough memory for the neighbor list.
162
163
    int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((mm_long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
    int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((mm_long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
164
#else
165
166
    int pos = (int) (get_group_id(0)*(mm_long)numTiles/get_num_groups(0));
    int end = (int) ((get_group_id(0)+1)*(mm_long)numTiles/get_num_groups(0));
167
168
169
#endif
    int nextToSkip = -1;
    int currentSkipIndex = 0;
170
    LOCAL int atomIndices[TILE_SIZE];
171
172
173
174
175
176

    while (pos < end) {
        bool includeTile = true;
        
        // Extract the coordinates of this tile.
        
177
        int x, y;
178
179
        bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
180
181
182
183
184
185
186
187
188
189
        x = tiles[pos];
        real4 blockSizeX = blockSize[x];
        singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
                              0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
                              0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
#else
        y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
        x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
        if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
            y += (x < y ? -1 : 1);
190
            x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
191
        }
192

193
        // Skip over tiles that have exclusions, since they were already processed.
194

195
196
        while (nextToSkip < pos) {
            if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
197
                int2 tile = exclusionTiles[currentSkipIndex++];
198
                nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
199
            }
200
201
            else
                nextToSkip = end;
202
        }
203
204
        includeTile = (nextToSkip != pos);
#endif
205
206
207
208
209
        if (includeTile) {
            // Load the data for this tile.

            for (int localAtomIndex = 0; localAtomIndex < TILE_SIZE; localAtomIndex++) {
#ifdef USE_CUTOFF
peastman's avatar
peastman committed
210
                unsigned int j = interactingAtoms[pos*TILE_SIZE+localAtomIndex];
211
212
213
214
215
#else
                unsigned int j = y*TILE_SIZE+localAtomIndex;
#endif
                atomIndices[localAtomIndex] = j;
                if (j < PADDED_NUM_ATOMS) {
216
                    local_pos[localAtomIndex] = trimTo3(posq[j]);
217
218
219
220
                    LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
                    local_value[localAtomIndex] = 0;
                }
            }
221
#ifdef USE_PERIODIC
222
223
224
225
226
227
            if (singlePeriodicCopy) {
                // The box is small enough that we can just translate all the atoms into a single periodic
                // box, then skip having to apply periodic boundary conditions later.

                real4 blockCenterX = blockCenter[x];
                for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++)
228
                    APPLY_PERIODIC_TO_POS_WITH_CENTER(local_pos[tgx], blockCenterX)
229
230
231
                for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
                    unsigned int atom1 = x*TILE_SIZE+tgx;
                    real value = 0;
232
233
                    real3 pos1 = trimTo3(posq[atom1]);
                    APPLY_PERIODIC_TO_POS_WITH_CENTER(pos1, blockCenterX)
234
235
                    LOAD_ATOM1_PARAMETERS
                    for (unsigned int j = 0; j < TILE_SIZE; j++) {
236
237
                        real3 pos2 = local_pos[j];
                        real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
238
239
240
                        real r2 = dot(delta.xyz, delta.xyz);
                        if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
                            real invR = RSQRT(r2);
peastman's avatar
peastman committed
241
                            real r = r2*invR;
242
243
244
245
246
247
248
249
                            unsigned int atom2 = j;
                            LOAD_ATOM2_PARAMETERS
                            atom2 = atomIndices[j];
                            real tempValue1 = 0;
                            real tempValue2 = 0;
                            COMPUTE_VALUE
                            value += tempValue1;
                            local_value[j] += tempValue2;
250
251
                            ADD_TEMP_DERIVS1
                            ADD_TEMP_DERIVS2
252
                        }
253
                    }
254

255
                    // Write results for atom1.
256

peastman's avatar
peastman committed
257
                    unsigned int offset1 = atom1;
258
                    ATOMIC_ADD(&global_value[offset1], (mm_ulong) realToFixedPoint(value));
peastman's avatar
peastman committed
259
                    STORE_PARAM_DERIVS1
260
261
262
263
264
                }
            }
            else
#endif
            {
265
                // We need to apply periodic boundary conditions separately for each interaction.
266
267
268

                for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
                    unsigned int atom1 = x*TILE_SIZE+tgx;
269
                    real value = 0;
270
                    real3 pos1 = trimTo3(posq[atom1]);
271
272
                    LOAD_ATOM1_PARAMETERS
                    for (unsigned int j = 0; j < TILE_SIZE; j++) {
273
274
                        real3 pos2 = local_pos[j];
                        real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
275
#ifdef USE_PERIODIC
276
                        APPLY_PERIODIC_TO_DELTA(delta)
277
#endif
278
                        real r2 = dot(delta.xyz, delta.xyz);
279
#ifdef USE_CUTOFF
280
                        if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
281
#else
282
                        if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS) {
283
#endif
284
                            real invR = RSQRT(r2);
peastman's avatar
peastman committed
285
                            real r = r2*invR;
286
287
288
289
290
                            unsigned int atom2 = j;
                            LOAD_ATOM2_PARAMETERS
                            atom2 = atomIndices[j];
                            real tempValue1 = 0;
                            real tempValue2 = 0;
291
                            COMPUTE_VALUE
292
293
                            value += tempValue1;
                            local_value[j] += tempValue2;
294
295
                            ADD_TEMP_DERIVS1
                            ADD_TEMP_DERIVS2
296
297
298
299
300
                        }
                    }

                    // Write results for atom1.

301
                    unsigned int offset1 = atom1;
302
                    ATOMIC_ADD(&global_value[offset1], (mm_ulong) realToFixedPoint(value));
303
                    STORE_PARAM_DERIVS1
304
305
306
                }
            }

307
            // Write results.
308
309

            for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
310
311
312
313
314
315
#ifdef USE_CUTOFF
                unsigned int atom2 = atomIndices[tgx];
#else
                unsigned int atom2 = y*TILE_SIZE + tgx;
#endif
                if (atom2 < PADDED_NUM_ATOMS) {
316
                    unsigned int offset2 = atom2;
317
                    ATOMIC_ADD(&global_value[offset2], (mm_ulong) realToFixedPoint(local_value[tgx]));
318
                    STORE_PARAM_DERIVS2
319
                }
320
321
322
323
324
            }
        }
        pos++;
    }
}