customGBEnergyN2_cpu.cc 15.9 KB
Newer Older
1
2
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(deriv##INDEX##_1));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_deriv##INDEX[tgx]));
3
4
5
6

/**
 * Compute a force based on pair interactions.
 */
7
8
9
10
KERNEL void computeN2Energy(
        GLOBAL mm_ulong* RESTRICT forceBuffers,
        GLOBAL mixed* RESTRICT energyBuffer,
        GLOBAL const real4* RESTRICT posq, GLOBAL const unsigned int* RESTRICT exclusions,
11
        GLOBAL const int2* exclusionTiles, int needEnergy,
12
#ifdef USE_CUTOFF
13
14
15
        GLOBAL const int* RESTRICT tiles, GLOBAL const unsigned int* RESTRICT interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
        real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, GLOBAL const real4* RESTRICT blockCenter,
        GLOBAL const real4* RESTRICT blockSize, GLOBAL const int* RESTRICT interactingAtoms
16
#else
17
        unsigned int numTiles
18
#endif
19
        PARAMETER_ARGUMENTS) {
20
    mixed energy = 0;
21
    INIT_PARAM_DERIVS
22
23
24
    LOCAL real3 local_pos[LOCAL_BUFFER_SIZE];
    LOCAL real3 local_force[LOCAL_BUFFER_SIZE];
    ATOM_PARAMETER_DATA
25

26
27
    // First loop: process tiles that contain exclusions.
    
28
29
    const int firstExclusionTile = FIRST_EXCLUSION_TILE+GROUP_ID*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/NUM_GROUPS;
    const int lastExclusionTile = FIRST_EXCLUSION_TILE+(GROUP_ID+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/NUM_GROUPS;
30
    for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) {
31
        const int2 tileIndices = exclusionTiles[pos];
32
33
        const unsigned int x = tileIndices.x;
        const unsigned int y = tileIndices.y;
34

35
        // Load the data for this tile.
36

37
38
        for (int localAtomIndex = 0; localAtomIndex < TILE_SIZE; localAtomIndex++) {
            unsigned int j = y*TILE_SIZE + localAtomIndex;
39
            local_pos[localAtomIndex] = trimTo3(posq[j]);
40
            LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
41
42
43
44
45
46
        }
        if (x == y) {
            // This tile is on the diagonal.

            for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef USE_EXCLUSIONS
47
                unsigned int excl = exclusions[pos*TILE_SIZE+tgx];
48
49
#endif
                unsigned int atom1 = x*TILE_SIZE+tgx;
50
                real4 force = 0;
51
                DECLARE_ATOM1_DERIVATIVES
52
                real3 pos1 = trimTo3(posq[atom1]);
53
54
                LOAD_ATOM1_PARAMETERS
                for (unsigned int j = 0; j < TILE_SIZE; j++) {
55
56
                    real3 pos2 = local_pos[j];
                    real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
57
#ifdef USE_PERIODIC
58
                    APPLY_PERIODIC_TO_DELTA(delta)
59
#endif
60
                    real r2 = dot(delta, delta);
61
62
63
#ifdef USE_CUTOFF
                    if (r2 < CUTOFF_SQUARED) {
#endif
64
                        real invR = RSQRT(r2);
peastman's avatar
peastman committed
65
                        real r = r2*invR;
66
67
68
69
70
                        unsigned int atom2 = j;
                        LOAD_ATOM2_PARAMETERS
                        atom2 = y*TILE_SIZE+j;
                        real dEdR = 0;
                        real tempEnergy = 0;
71
                        const real interactionScale = 0.5f;
72
73
74
75
76
77
78
79
#ifdef USE_EXCLUSIONS
                        bool isExcluded = !(excl & 0x1);
#endif
                        if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2) {
                            COMPUTE_INTERACTION
                            dEdR /= -r;
                        }
                        energy += 0.5f*tempEnergy;
80
81
82
83
                        delta *= dEdR;
                        force.x -= delta.x;
                        force.y -= delta.y;
                        force.z -= delta.z;
84
85
86
87
88
89
90
91
#ifdef USE_CUTOFF
                    }
#endif
#ifdef USE_EXCLUSIONS
                    excl >>= 1;
#endif
                }

92
                // Write results.
93

94
                unsigned int offset = atom1;
95
96
97
                ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(force.x));
                ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
                ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
98
99
100
101
102
103
104
                STORE_DERIVATIVES_1
            }
        }
        else {
            // This is an off-diagonal tile.

            for (int localAtomIndex = 0; localAtomIndex < TILE_SIZE; localAtomIndex++) {
105
                local_force[localAtomIndex] = 0;
106
107
                CLEAR_LOCAL_DERIVATIVES
            }
108
109
110
111
112
113
114
            for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef USE_EXCLUSIONS
                unsigned int excl = exclusions[pos*TILE_SIZE+tgx];
#endif
                unsigned int atom1 = x*TILE_SIZE+tgx;
                real4 force = 0;
                DECLARE_ATOM1_DERIVATIVES
115
                real3 pos1 = trimTo3(posq[atom1]);
116
117
                LOAD_ATOM1_PARAMETERS
                for (unsigned int j = 0; j < TILE_SIZE; j++) {
118
119
                    real3 pos2 = local_pos[j];
                    real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
120
#ifdef USE_PERIODIC
121
                    APPLY_PERIODIC_TO_DELTA(delta)
122
#endif
123
124
125
126
127
                    real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
                    if (r2 < CUTOFF_SQUARED) {
#endif
                        real invR = RSQRT(r2);
peastman's avatar
peastman committed
128
                        real r = r2*invR;
129
130
131
132
133
                        unsigned int atom2 = j;
                        LOAD_ATOM2_PARAMETERS
                        atom2 = y*TILE_SIZE+j;
                        real dEdR = 0;
                        real tempEnergy = 0;
134
                        const real interactionScale = 1.0f;
135
136
137
138
139
140
141
142
#ifdef USE_EXCLUSIONS
                        bool isExcluded = (atom1 >= NUM_ATOMS || atom2 >= NUM_ATOMS || !(excl & 0x1));
                        if (!isExcluded) {
#else
                        if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
#endif
                            COMPUTE_INTERACTION
                            dEdR /= -r;
143
                        }
144
                        energy += tempEnergy;
145
146
147
148
                        delta *= dEdR;
                        force.x -= delta.x;
                        force.y -= delta.y;
                        force.z -= delta.z;
149
150
151
152
153
154
155
156
157
158
159
160
161
162
                        atom2 = j;
                        local_force[atom2].xyz += delta.xyz;
                        RECORD_DERIVATIVE_2
#ifdef USE_CUTOFF
                    }
#endif
#ifdef USE_EXCLUSIONS
                    excl >>= 1;
#endif
                }

                // Write results for atom1.

                unsigned int offset = atom1;
163
164
165
                ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(force.x));
                ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
                ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
166
167
168
169
170
171
172
                STORE_DERIVATIVES_1
            }

            // Write results.

            for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
                unsigned int offset = y*TILE_SIZE+tgx;
173
174
175
                ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(local_force[tgx].x));
                ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[tgx].y));
                ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[tgx].z));
176
177
178
179
                STORE_DERIVATIVES_2
            }
        }
    }
180

181
182
    // Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
    // of them (no cutoff).
183

184
185
#ifdef USE_CUTOFF
    const unsigned int numTiles = interactionCount[0];
186
187
    if (numTiles > maxTiles)
        return; // There wasn't enough memory for the neighbor list.
188
189
    int pos = (int) (GROUP_ID*(numTiles > maxTiles ? NUM_BLOCKS*((mm_long)NUM_BLOCKS+1)/2 : numTiles)/NUM_GROUPS);
    int end = (int) ((GROUP_ID+1)*(numTiles > maxTiles ? NUM_BLOCKS*((mm_long)NUM_BLOCKS+1)/2 : numTiles)/NUM_GROUPS);
190
#else
191
192
    int pos = (int) (GROUP_ID*(mm_long)numTiles/NUM_GROUPS);
    int end = (int) ((GROUP_ID+1)*(mm_long)numTiles/NUM_GROUPS);
193
194
195
#endif
    int nextToSkip = -1;
    int currentSkipIndex = 0;
196
    LOCAL int atomIndices[TILE_SIZE];
197
198
199
200
201
202
203

    while (pos < end) {
        const bool isExcluded = false;
        bool includeTile = true;
        
        // Extract the coordinates of this tile.
        
204
        int x, y;
205
206
        bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
207
208
209
210
211
212
213
214
215
216
        x = tiles[pos];
        real4 blockSizeX = blockSize[x];
        singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
                              0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
                              0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
#else
        y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
        x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
        if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
            y += (x < y ? -1 : 1);
217
            x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
218
        }
219

220
        // Skip over tiles that have exclusions, since they were already processed.
221

222
223
        while (nextToSkip < pos) {
            if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
224
                int2 tile = exclusionTiles[currentSkipIndex++];
225
                nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
226
            }
227
228
            else
                nextToSkip = end;
229
        }
230
231
        includeTile = (nextToSkip != pos);
#endif
232
233
234
235
236
        if (includeTile) {
            // Load the data for this tile.

            for (int localAtomIndex = 0; localAtomIndex < TILE_SIZE; localAtomIndex++) {
#ifdef USE_CUTOFF
peastman's avatar
peastman committed
237
                unsigned int j = interactingAtoms[pos*TILE_SIZE+localAtomIndex];
238
239
240
241
242
#else
                unsigned int j = y*TILE_SIZE+localAtomIndex;
#endif
                atomIndices[localAtomIndex] = j;
                if (j < PADDED_NUM_ATOMS) {
243
                    local_pos[localAtomIndex] = trimTo3(posq[j]);
244
245
246
247
248
249
250
251
252
253
254
255
                    LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
                    local_force[localAtomIndex] = 0;
                    CLEAR_LOCAL_DERIVATIVES
                }
            }
#ifdef USE_PERIODIC
            if (singlePeriodicCopy) {
                // The box is small enough that we can just translate all the atoms into a single periodic
                // box, then skip having to apply periodic boundary conditions later.

                real4 blockCenterX = blockCenter[x];
                for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++)
256
                    APPLY_PERIODIC_TO_POS_WITH_CENTER(local_pos[tgx], blockCenterX)
257
258
259
260
                for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
                    unsigned int atom1 = x*TILE_SIZE+tgx;
                    real4 force = 0;
                    DECLARE_ATOM1_DERIVATIVES
261
262
                    real3 pos1 = trimTo3(posq[atom1]);
                    APPLY_PERIODIC_TO_POS_WITH_CENTER(pos1, blockCenterX)
263
264
                    LOAD_ATOM1_PARAMETERS
                    for (unsigned int j = 0; j < TILE_SIZE; j++) {
265
266
                        real3 pos2 = local_pos[j];
                        real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
267
268
269
                        real r2 = dot(delta.xyz, delta.xyz);
                        if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
                            real invR = RSQRT(r2);
peastman's avatar
peastman committed
270
                            real r = r2*invR;
271
272
273
274
275
                            unsigned int atom2 = j;
                            LOAD_ATOM2_PARAMETERS
                            atom2 = atomIndices[j];
                            real dEdR = 0;
                            real tempEnergy = 0;
276
                            const real interactionScale = 1.0f;
277
278
279
                            COMPUTE_INTERACTION
                            dEdR /= -r;
                            energy += tempEnergy;
280
281
282
283
                            delta *= dEdR;
                            force.x -= delta.x;
                            force.y -= delta.y;
                            force.z -= delta.z;
284
285
286
287
                            atom2 = j;
                            local_force[atom2].xyz += delta.xyz;
                            RECORD_DERIVATIVE_2
                        }
288
                    }
289
290
291
292

                    // Write results for atom1.

                    unsigned int offset = atom1;
293
294
295
                    ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(force.x));
                    ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
                    ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
296
                    STORE_DERIVATIVES_1
297
298
299
300
301
                }
            }
            else
#endif
            {
302
                // We need to apply periodic boundary conditions separately for each interaction.
303
304
305

                for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
                    unsigned int atom1 = x*TILE_SIZE+tgx;
306
                    real4 force = 0;
307
                    DECLARE_ATOM1_DERIVATIVES
308
                    real3 pos1 = trimTo3(posq[atom1]);
309
310
                    LOAD_ATOM1_PARAMETERS
                    for (unsigned int j = 0; j < TILE_SIZE; j++) {
311
312
                        real3 pos2 = local_pos[j];
                        real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
313
#ifdef USE_PERIODIC
314
                        APPLY_PERIODIC_TO_DELTA(delta)
315
#endif
316
                        real r2 = dot(delta.xyz, delta.xyz);
317
#ifdef USE_CUTOFF
318
319
320
                        if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS && r2 < CUTOFF_SQUARED) {
#else
                        if (atom1 < NUM_ATOMS && atomIndices[j] < NUM_ATOMS) {
321
#endif
322
                            real invR = RSQRT(r2);
peastman's avatar
peastman committed
323
                            real r = r2*invR;
324
325
326
327
328
                            unsigned int atom2 = j;
                            LOAD_ATOM2_PARAMETERS
                            atom2 = atomIndices[j];
                            real dEdR = 0;
                            real tempEnergy = 0;
329
                            const real interactionScale = 1.0f;
330
331
                            COMPUTE_INTERACTION
                            dEdR /= -r;
332
                            energy += tempEnergy;
333
334
335
336
                            delta *= dEdR;
                            force.x -= delta.x;
                            force.y -= delta.y;
                            force.z -= delta.z;
337
                            atom2 = j;
338
                            local_force[atom2] += delta;
339
                            RECORD_DERIVATIVE_2
340
341
342
343
344
                        }
                    }

                    // Write results for atom1.

345
                    unsigned int offset = atom1;
346
347
348
                    ATOMIC_ADD(&forceBuffers[offset], (mm_ulong) realToFixedPoint(force.x));
                    ATOMIC_ADD(&forceBuffers[offset+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.y));
                    ATOMIC_ADD(&forceBuffers[offset+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force.z));
349
                    STORE_DERIVATIVES_1
350
351
352
                }
            }

353
            // Write results.
354
355

            for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
356
357
358
359
360
361
#ifdef USE_CUTOFF
                unsigned int atom2 = atomIndices[tgx];
#else
                unsigned int atom2 = y*TILE_SIZE + tgx;
#endif
                if (atom2 < PADDED_NUM_ATOMS) {
362
363
364
                    ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) realToFixedPoint(local_force[tgx].x));
                    ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[tgx].y));
                    ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_force[tgx].z));
365
366
367
                    unsigned int offset = atom2;
                    STORE_DERIVATIVES_2
                }
368
369
370
371
            }
        }
        pos++;
    }
372
    energyBuffer[GLOBAL_ID] += energy;
373
    SAVE_PARAM_DERIVS
374
}