nonbonded_nvidia.cl 8.36 KB
Newer Older
1
#define TILE_SIZE 32
2

3
4
5
6
7
8
typedef struct {
    float4 posq;
    float4 force;
    ATOM_PARAMETER_DATA
} AtomData;

9
10
11
12
/**
 * Compute nonbonded interactions.
 */

13
__kernel void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global unsigned int* exclusions,
14
        __global unsigned int* exclusionIndices, __local AtomData* localData, __local float4* tempBuffer, __global unsigned int* tiles,
15
#ifdef USE_CUTOFF
16
        __global unsigned int* interactionFlags, __global unsigned int* interactionCount
17
18
#else
        unsigned int numTiles
19
#endif
20
        PARAMETER_ARGUMENTS) {
21
22
23
#ifdef USE_CUTOFF
    unsigned int numTiles = interactionCount[0];
#endif
24
25
    unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
    unsigned int warp = get_global_id(0)/TILE_SIZE;
26
27
28
29
30
31
    unsigned int pos = warp*numTiles/totalWarps;
    unsigned int end = (warp+1)*numTiles/totalWarps;
    float energy = 0.0f;
    unsigned int lasty = 0xFFFFFFFF;

    while (pos < end) {
32
        // Extract the coordinates of this tile
33
        unsigned int x = tiles[pos];
34
        unsigned int y = ((x >> 2) & 0x7fff)*TILE_SIZE;
35
        bool hasExclusions = (x & 0x1);
36
37
        x = (x>>17)*TILE_SIZE;
        unsigned int tgx = get_local_id(0) & (TILE_SIZE-1);
38
        unsigned int tbx = get_local_id(0) - tgx;
39
        unsigned int atom1 = x + tgx;
40
        float4 force = 0.0f;
41
        float4 posq1 = posq[atom1];
42
        LOAD_ATOM1_PARAMETERS
43
        if (x == y) {
44
            // This tile is on the diagonal.
45

46
            localData[get_local_id(0)].posq = posq1;
47
            LOAD_LOCAL_PARAMETERS_FROM_1
48
49
            unsigned int xi = x/TILE_SIZE;
            unsigned int tile = xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*(xi+1)/2;
50
#ifdef USE_EXCLUSIONS
51
            unsigned int excl = exclusions[exclusionIndices[tile]+tgx];
52
#endif
53
            for (unsigned int j = 0; j < TILE_SIZE; j++) {
54
#ifdef USE_EXCLUSIONS
55
                bool isExcluded = !(excl & 0x1);
56
#endif
57
                int atom2 = tbx+j;
58
                float4 posq2 = localData[atom2].posq;
59
                float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
60
#ifdef USE_PERIODIC
61
62
63
                delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
                delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
                delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
64
#endif
65
66
                float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
                float r = sqrt(r2);
67
                float invR = 1.0f/r;
68
69
                LOAD_ATOM2_PARAMETERS
                atom2 = y+j;
70
71
72
                float dEdR = 0.0f;
                float tempEnergy = 0.0f;
                COMPUTE_INTERACTION
73
74
                energy += 0.5f*tempEnergy;
                delta.xyz *= dEdR;
75
                force.xyz -= delta.xyz;
76
                excl >>= 1;
77
78
79
            }

            // Write results
80
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
81
            unsigned int offset = x + tgx + (x/TILE_SIZE)*PADDED_NUM_ATOMS;
82
#else
83
            unsigned int offset = x + tgx + warp*PADDED_NUM_ATOMS;
84
#endif
85
            forceBuffers[offset].xyz += force.xyz;
86
87
        }
        else {
88
89
            // This is an off-diagonal tile.

90
91
            if (lasty != y) {
                unsigned int j = y + tgx;
92
                localData[get_local_id(0)].posq = posq[j];
93
                LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
94
            }
95
            localData[get_local_id(0)].force = 0.0f;
96
#ifdef USE_CUTOFF
97
            unsigned int flags = interactionFlags[pos];
98
            if (!hasExclusions && flags != 0xFFFFFFFF  && flags == 0) {
99
100
101
102
103
104
                if (flags == 0) {
                    // No interactions in this tile.
                }
                else {
                    // Compute only a subset of the interactions in this tile.

105
                    for (unsigned int j = 0; j < TILE_SIZE; j++) {
106
107
                        if ((flags&(1<<j)) != 0) {
                            bool isExcluded = false;
108
                            int atom2 = tbx+j;
109
                            float4 posq2 = localData[atom2].posq;
110
                            float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
111
#ifdef USE_PERIODIC
112
113
114
                            delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
                            delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
                            delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
115
#endif
116
                            float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
117
118
                            float invR = native_rsqrt(r2);
                            float r = native_recip(invR);
119
120
                            LOAD_ATOM2_PARAMETERS
                            atom2 = y+j;
121
122
123
                            float dEdR = 0.0f;
                            float tempEnergy = 0.0f;
                            COMPUTE_INTERACTION
124
125
			    energy += tempEnergy;
                            delta.xyz *= dEdR;
126
                            force.xyz -= delta.xyz;
127
128
                            tempBuffer[get_local_id(0)] = delta;

129
                            // Sum the forces on atom2.
130
131
132
133
134
135
136
137
138
139

                            if (tgx % 2 == 0)
                                tempBuffer[get_local_id(0)].xyz += tempBuffer[get_local_id(0)+1].xyz;
                            if (tgx % 4 == 0)
                                tempBuffer[get_local_id(0)].xyz += tempBuffer[get_local_id(0)+2].xyz;
                            if (tgx % 8 == 0)
                                tempBuffer[get_local_id(0)].xyz += tempBuffer[get_local_id(0)+4].xyz;
                            if (tgx % 16 == 0)
                                tempBuffer[get_local_id(0)].xyz += tempBuffer[get_local_id(0)+8].xyz;
                            if (tgx == 0)
140
                                localData[tbx+j].force.xyz += tempBuffer[get_local_id(0)].xyz + tempBuffer[get_local_id(0)+16].xyz;
141
142
143
144
                        }
                    }
                }
            }
145
            else
146
147
#endif
            {
148
149
                // Compute the full set of interactions in this tile.

150
151
152
                unsigned int xi = x/TILE_SIZE;
                unsigned int yi = y/TILE_SIZE;
                unsigned int tile = xi+yi*PADDED_NUM_ATOMS/TILE_SIZE-yi*(yi+1)/2;
153
#ifdef USE_EXCLUSIONS
154
                unsigned int excl = (hasExclusions ? exclusions[exclusionIndices[tile]+tgx] : 0xFFFFFFFF);
155
                excl = (excl >> tgx) | (excl << (TILE_SIZE - tgx));
156
#endif
157
                unsigned int tj = tgx;
158
                for (unsigned int j = 0; j < TILE_SIZE; j++) {
159
#ifdef USE_EXCLUSIONS
160
                    bool isExcluded = !(excl & 0x1);
161
#endif
162
                    int atom2 = tbx+tj;
163
                    float4 posq2 = localData[atom2].posq;
164
                    float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
165
#ifdef USE_PERIODIC
166
167
168
                    delta.x -= floor(delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
                    delta.y -= floor(delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
                    delta.z -= floor(delta.z*INV_PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
169
#endif
170
                    float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
171
172
                    float invR = native_rsqrt(r2);
                    float r = native_recip(invR);
173
174
                    LOAD_ATOM2_PARAMETERS
                    atom2 = y+tj;
175
176
177
                    float dEdR = 0.0f;
                    float tempEnergy = 0.0f;
                    COMPUTE_INTERACTION
178
179
		    energy += tempEnergy;
                    delta.xyz *= dEdR;
180
                    force.xyz -= delta.xyz;
181
                    localData[tbx+tj].force.xyz += delta.xyz;
182
                    excl >>= 1;
183
                    tj = (tj + 1) & (TILE_SIZE - 1);
184
185
186
187
                }
            }

            // Write results
188
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
189
190
            unsigned int offset1 = x + tgx + (y/TILE_SIZE)*PADDED_NUM_ATOMS;
            unsigned int offset2 = y + tgx + (x/TILE_SIZE)*PADDED_NUM_ATOMS;
191
#else
192
193
            unsigned int offset1 = x + tgx + warp*PADDED_NUM_ATOMS;
            unsigned int offset2 = y + tgx + warp*PADDED_NUM_ATOMS;
194
#endif
195
            forceBuffers[offset1].xyz += force.xyz;
196
            forceBuffers[offset2].xyz += localData[get_local_id(0)].force.xyz;
197
198
199
200
201
202
            lasty = y;
        }
        pos++;
    }
    energyBuffer[get_global_id(0)] += energy;
}