customNonbondedGroups.cl 8.61 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#ifdef SUPPORTS_64_BIT_ATOMICS
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#endif

typedef struct {
    real x, y, z;
    real q;
    real fx, fy, fz;
    ATOM_PARAMETER_DATA
#ifndef PARAMETER_SIZE_IS_EVEN
    real padding;
#endif
} AtomData;

15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
/**
 * This function is used on devices that don't support 64 bit atomics.  Multiple threads within
 * a single tile might have computed forces on the same atom.  This loops over them and makes sure
 * that only one thread updates the force on any given atom.
 */
void writeForces(__global real4* forceBuffers,__local AtomData* localData, int atomIndex) {
    localData[get_local_id(0)].x = atomIndex;
    SYNC_WARPS;
    real4 forceSum = (real4) 0;
    int start = (get_local_id(0)/TILE_SIZE)*TILE_SIZE;
    int end = start+32;
    bool isFirst = true;
    for (int i = start; i < end; i++)
        if (localData[i].x == atomIndex) {
            forceSum += (real4) (localData[i].fx, localData[i].fy, localData[i].fz, 0);
            isFirst &= (i >= get_local_id(0));
        }
    const unsigned int warp = get_global_id(0)/TILE_SIZE;
    unsigned int offset = atomIndex + warp*PADDED_NUM_ATOMS;
    if (isFirst)
        forceBuffers[offset] += forceSum;
    SYNC_WARPS;
}

39
__kernel void computeInteractionGroups(
40
41
42
43
44
#ifdef SUPPORTS_64_BIT_ATOMICS
        __global long* restrict forceBuffers,
#else
        __global real4* restrict forceBuffers,
#endif
45
        __global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict groupData,
46
        __global int* restrict numGroupTiles, int useNeighborList,
47
        real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
48
49
50
51
52
        PARAMETER_ARGUMENTS) {
    const unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
    const unsigned int warp = get_global_id(0)/TILE_SIZE; // global warpIndex
    const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1); // index within the warp
    const unsigned int tbx = get_local_id(0) - tgx;           // block warpIndex
53
    mixed energy = 0;
54
    INIT_DERIVATIVES
55
    __local AtomData localData[LOCAL_MEMORY_SIZE];
56

57
58
    const unsigned int startTile = (useNeighborList ? warp*numGroupTiles[0]/totalWarps : FIRST_TILE+warp*(LAST_TILE-FIRST_TILE)/totalWarps);
    const unsigned int endTile = (useNeighborList ? (warp+1)*numGroupTiles[0]/totalWarps : FIRST_TILE+(warp+1)*(LAST_TILE-FIRST_TILE)/totalWarps);
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
    for (int tile = startTile; tile < endTile; tile++) {
        const int4 atomData = groupData[TILE_SIZE*tile+tgx];
        const int atom1 = atomData.x;
        const int atom2 = atomData.y;
        const int rangeStart = atomData.z&0xFFFF;
        const int rangeEnd = (atomData.z>>16)&0xFFFF;
        const int exclusions = atomData.w;
        real4 posq1 = posq[atom1];
        LOAD_ATOM1_PARAMETERS
        real4 force = (real4) (0);
        real4 posq2 = posq[atom2];
        localData[get_local_id(0)].x = posq2.x;
        localData[get_local_id(0)].y = posq2.y;
        localData[get_local_id(0)].z = posq2.z;
        localData[get_local_id(0)].q = posq2.w;
        LOAD_LOCAL_PARAMETERS
        localData[get_local_id(0)].fx = 0.0f;
        localData[get_local_id(0)].fy = 0.0f;
        localData[get_local_id(0)].fz = 0.0f;
        int tj = tgx;
        SYNC_WARPS;
        for (int j = rangeStart; j < rangeEnd; j++) {
peastman's avatar
peastman committed
81
82
83
84
85
            if (tj < rangeEnd) {
                bool isExcluded = (((exclusions>>tj)&1) == 0);
                int localIndex = tbx+tj;
                posq2 = (real4) (localData[localIndex].x, localData[localIndex].y, localData[localIndex].z, localData[localIndex].q);
                real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
86
#ifdef USE_PERIODIC
87
                APPLY_PERIODIC_TO_DELTA(delta)
88
#endif
peastman's avatar
peastman committed
89
                real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
90
#ifdef USE_CUTOFF
peastman's avatar
peastman committed
91
                if (!isExcluded && r2 < CUTOFF_SQUARED) {
92
#endif
peastman's avatar
peastman committed
93
                    real invR = RSQRT(r2);
peastman's avatar
peastman committed
94
                    real r = r2*invR;
peastman's avatar
peastman committed
95
96
97
                    LOAD_ATOM2_PARAMETERS
                    real dEdR = 0.0f;
                    real tempEnergy = 0.0f;
98
                    const real interactionScale = 1.0f;
peastman's avatar
peastman committed
99
100
101
102
103
104
105
                    COMPUTE_INTERACTION
                    energy += tempEnergy;
                    delta *= dEdR;
                    force.xyz -= delta.xyz;
                    localData[localIndex].fx += delta.x;
                    localData[localIndex].fy += delta.y;
                    localData[localIndex].fz += delta.z;
106
#ifdef USE_CUTOFF
peastman's avatar
peastman committed
107
                }
108
#endif
peastman's avatar
peastman committed
109
            }
110
111
112
            tj = (tj == rangeEnd-1 ? rangeStart : tj+1);
            SYNC_WARPS;
        }
113
#ifdef SUPPORTS_64_BIT_ATOMICS
114
115
116
117
118
        if (exclusions != 0) {
            atom_add(&forceBuffers[atom1], (long) (force.x*0x100000000));
            atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS], (long) (force.y*0x100000000));
            atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (long) (force.z*0x100000000));
        }
peastman's avatar
peastman committed
119
120
121
        atom_add(&forceBuffers[atom2], (long) (localData[get_local_id(0)].fx*0x100000000));
        atom_add(&forceBuffers[atom2+PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fy*0x100000000));
        atom_add(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fz*0x100000000));
122
123
124
125
126
127
128
#else
        writeForces(forceBuffers, localData, atom2);
        localData[get_local_id(0)].fx = force.x;
        localData[get_local_id(0)].fy = force.y;
        localData[get_local_id(0)].fz = force.z;
        writeForces(forceBuffers, localData, atom1);
#endif
129
130
    }
    energyBuffer[get_global_id(0)] += energy;
131
    SAVE_DERIVATIVES
132
}
133
134
135
136
137
138
139
140
141
142
143
144
145
146

/**
 * If the neighbor list needs to be rebuilt, reset the number of tiles to 0.  This is
 * executed by a single thread.
 */
__kernel void prepareToBuildNeighborList(__global int* restrict rebuildNeighborList, __global int* restrict numGroupTiles) {
    if (rebuildNeighborList[0] == 1)
        numGroupTiles[0] = 0;
}

/**
 * Filter the list of tiles to include only ones that have interactions within the
 * padded cutoff.
 */
peastman's avatar
peastman committed
147
__kernel void buildNeighborList(__global int* restrict rebuildNeighborList, __global int* restrict numGroupTiles,
148
149
150
151
152
153
154
155
156
157
158
159
160
161
        __global const real4* restrict posq, __global const int4* restrict groupData, __global int4* restrict filteredGroupData,
        real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
    
    // If the neighbor list doesn't need to be rebuilt on this step, return immediately.
    
    if (rebuildNeighborList[0] == 0)
        return;

    const unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
    const unsigned int warp = get_global_id(0)/TILE_SIZE; // global warpIndex
    const unsigned int local_warp = get_local_id(0)/TILE_SIZE; // local warpIndex
    const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1); // index within the warp
    const unsigned int tbx = get_local_id(0) - tgx;           // block warpIndex
    __local real4 localPos[LOCAL_MEMORY_SIZE];
162
163
    __local volatile bool anyInteraction[WARPS_IN_BLOCK];
    __local volatile int tileIndex[WARPS_IN_BLOCK];
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195

    const unsigned int startTile = warp*NUM_TILES/totalWarps;
    const unsigned int endTile = (warp+1)*NUM_TILES/totalWarps;
    for (int tile = startTile; tile < endTile; tile++) {
        const int4 atomData = groupData[TILE_SIZE*tile+tgx];
        const int atom1 = atomData.x;
        const int atom2 = atomData.y;
        const int rangeStart = atomData.z&0xFFFF;
        const int rangeEnd = (atomData.z>>16)&0xFFFF;
        const int exclusions = atomData.w;
        real4 posq1 = posq[atom1];
        localPos[get_local_id(0)] = posq[atom2];
        if (tgx == 0)
            anyInteraction[local_warp] = false;
        int tj = tgx;
        SYNC_WARPS;
        for (int j = rangeStart; j < rangeEnd && !anyInteraction[local_warp]; j++) {
            if (tj < rangeEnd) {
                bool isExcluded = (((exclusions>>tj)&1) == 0);
                int localIndex = tbx+tj;
                real4 delta = (real4) (localPos[localIndex].xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
                APPLY_PERIODIC_TO_DELTA(delta)
#endif
                real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
                if (!isExcluded && r2 < PADDED_CUTOFF_SQUARED)
                    anyInteraction[local_warp] = true;
            }
            tj = (tj == rangeEnd-1 ? rangeStart : tj+1);
            SYNC_WARPS;
        }
        if (anyInteraction[local_warp]) {
peastman's avatar
peastman committed
196
            SYNC_WARPS;
197
            if (tgx == 0)
peastman's avatar
peastman committed
198
                tileIndex[local_warp] = atomic_add(numGroupTiles, 1);
199
200
201
202
203
            SYNC_WARPS;
            filteredGroupData[TILE_SIZE*tileIndex[local_warp]+tgx] = atomData;
        }
    }
}