"platforms/opencl/src/kernels/nonbonded_nvidia.cl" did not exist on "072fc1209f90cfe48c805c517cc93b5b6ee440c2"
customHbondForce.cc 11.9 KB
Newer Older
1
/**
Peter Eastman's avatar
Peter Eastman committed
2
 * Compute the difference between two vectors, optionally taking periodic boundary conditions into account
3
4
 * and setting the fourth component to the squared magnitude.
 */
5
6
inline DEVICE real4 delta(real4 vec1, real4 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
    real4 result = make_real4(vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0);
7
#ifdef USE_PERIODIC
8
    APPLY_PERIODIC_TO_DELTA(result)
9
10
11
12
13
14
15
16
#endif
    result.w = result.x*result.x + result.y*result.y + result.z*result.z;
    return result;
}

/**
 * Compute the angle between two vectors.  The w component of each vector should contain the squared magnitude.
 */
17
inline DEVICE real computeAngle(real4 vec1, real4 vec2) {
18
19
20
    real dotProduct = vec1.x*vec2.x + vec1.y*vec2.y + vec1.z*vec2.z;
    real cosine = dotProduct*RSQRT(vec1.w*vec2.w);
    real angle;
21
22
23
    if (cosine > 0.99f || cosine < -0.99f) {
        // We're close to the singularity in acos(), so take the cross product and use asin() instead.

24
        real3 crossProduct = cross(trimTo3(vec1), trimTo3(vec2));
25
        real scale = vec1.w*vec2.w;
26
27
28
        angle = ASIN(SQRT(dot(crossProduct, crossProduct)/scale));
        if (cosine < 0)
            angle = M_PI-angle;
29
30
    }
    else
31
       angle = ACOS(cosine);
32
33
34
35
    return angle;
}

/**
36
 * Compute the cross product of two vectors, setting the fourth component to the squared magnitude.
37
 */
38
39
40
inline DEVICE real4 computeCross(real4 vec1, real4 vec2) {
    real3 cp = cross(trimTo3(vec1), trimTo3(vec2));
    return make_real4(cp.x, cp.y, cp.z, cp.x*cp.x+cp.y*cp.y+cp.z*cp.z);
41
}
42

43
44
45
/**
 * Compute forces on donors.
 */
46
47
48
49
50
51
52
53
KERNEL void computeDonorForces(
#ifdef SUPPORTS_64_BIT_ATOMICS
	GLOBAL mm_ulong* RESTRICT force,
#else
	GLOBAL real4* RESTRICT forceBuffers, GLOBAL const int4* RESTRICT donorBufferIndices,
#endif
	GLOBAL mixed* RESTRICT energyBuffer, GLOBAL const real4* RESTRICT posq, GLOBAL const int4* RESTRICT exclusions,
        GLOBAL const int4* RESTRICT donorAtoms, GLOBAL const int4* RESTRICT acceptorAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize,
54
        real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
55
        PARAMETER_ARGUMENTS) {
56
    LOCAL real4 posBuffer[3*THREAD_BLOCK_SIZE];
57
    mixed energy = 0;
58
59
60
61
    real3 f1 = make_real3(0);
    real3 f2 = make_real3(0);
    real3 f3 = make_real3(0);
    for (int donorStart = 0; donorStart < NUM_DONORS; donorStart += GLOBAL_SIZE) {
62
63
        // Load information about the donor this thread will compute forces on.

64
        int donorIndex = donorStart+GLOBAL_ID;
65
        int4 atoms, exclusionIndices;
66
        real4 d1, d2, d3;
67
68
        if (donorIndex < NUM_DONORS) {
            atoms = donorAtoms[donorIndex];
69
70
71
            d1 = (atoms.x > -1 ? posq[atoms.x] : make_real4(0));
            d2 = (atoms.y > -1 ? posq[atoms.y] : make_real4(0));
            d3 = (atoms.z > -1 ? posq[atoms.z] : make_real4(0));
72
73
74
#ifdef USE_EXCLUSIONS
            exclusionIndices = exclusions[donorIndex];
#endif
75
76
        }
        else
77
78
            atoms = make_int4(-1, -1, -1, -1);
        for (int acceptorStart = 0; acceptorStart < NUM_ACCEPTORS; acceptorStart += LOCAL_SIZE) {
79
80
            // Load the next block of acceptors into local memory.

81
82
83
84
85
86
87
88
89
            SYNC_THREADS;
            int blockSize = min((int) LOCAL_SIZE, NUM_ACCEPTORS-acceptorStart);
            if (LOCAL_ID < blockSize) {
                int4 atoms2 = acceptorAtoms[acceptorStart+LOCAL_ID];
                posBuffer[3*LOCAL_ID] = (atoms2.x > -1 ? posq[atoms2.x] : make_real4(0));
                posBuffer[3*LOCAL_ID+1] = (atoms2.y > -1 ? posq[atoms2.y] : make_real4(0));
                posBuffer[3*LOCAL_ID+2] = (atoms2.z > -1 ? posq[atoms2.z] : make_real4(0));
            }
            SYNC_THREADS;
90
91
            if (donorIndex < NUM_DONORS) {
                for (int index = 0; index < blockSize; index++) {
92
                    int acceptorIndex = acceptorStart+index;
93
#ifdef USE_EXCLUSIONS
94
95
96
                    if (acceptorIndex == exclusionIndices.x || acceptorIndex == exclusionIndices.y || acceptorIndex == exclusionIndices.z || acceptorIndex == exclusionIndices.w)
                        continue;
#endif
97
98
                    // Compute the interaction between a donor and an acceptor.

99
100
101
                    real4 a1 = posBuffer[3*index];
                    real4 a2 = posBuffer[3*index+1];
                    real4 a3 = posBuffer[3*index+2];
Peter Eastman's avatar
Peter Eastman committed
102
                    real4 deltaD1A1 = delta(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
103
#ifdef USE_CUTOFF
104
                    if (deltaD1A1.w < CUTOFF_SQUARED) {
105
#endif
106
107
108
                        COMPUTE_DONOR_FORCE
#ifdef USE_CUTOFF
                    }
109
#endif
110
111
112
                }
            }
        }
113

114
        // Write results
115

116
        if (donorIndex < NUM_DONORS) {
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#ifdef SUPPORTS_64_BIT_ATOMICS
            if (atoms.x > -1) {
                ATOMIC_ADD(&force[atoms.x], (mm_ulong) ((mm_long) (f1.x*0x100000000)));
                ATOMIC_ADD(&force[atoms.x+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f1.y*0x100000000)));
                ATOMIC_ADD(&force[atoms.x+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f1.z*0x100000000)));
                MEM_FENCE;
            }
            if (atoms.y > -1) {
                ATOMIC_ADD(&force[atoms.y], (mm_ulong) ((mm_long) (f2.x*0x100000000)));
                ATOMIC_ADD(&force[atoms.y+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f2.y*0x100000000)));
                ATOMIC_ADD(&force[atoms.y+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f2.z*0x100000000)));
                MEM_FENCE;
            }
            if (atoms.z > -1) {
                ATOMIC_ADD(&force[atoms.z], (mm_ulong) ((mm_long) (f3.x*0x100000000)));
                ATOMIC_ADD(&force[atoms.z+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f3.y*0x100000000)));
                ATOMIC_ADD(&force[atoms.z+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f3.z*0x100000000)));
                MEM_FENCE;
            }
#else
137
138
139
            int4 bufferIndices = donorBufferIndices[donorIndex];
            if (atoms.x > -1) {
                unsigned int offset = atoms.x+bufferIndices.x*PADDED_NUM_ATOMS;
140
                real4 force = forceBuffers[offset];
141
142
143
144
145
                force.xyz += f1.xyz;
                forceBuffers[offset] = force;
            }
            if (atoms.y > -1) {
                unsigned int offset = atoms.y+bufferIndices.y*PADDED_NUM_ATOMS;
146
                real4 force = forceBuffers[offset];
147
148
149
150
151
                force.xyz += f2.xyz;
                forceBuffers[offset] = force;
            }
            if (atoms.z > -1) {
                unsigned int offset = atoms.z+bufferIndices.z*PADDED_NUM_ATOMS;
152
                real4 force = forceBuffers[offset];
153
154
155
                force.xyz += f3.xyz;
                forceBuffers[offset] = force;
            }
156
#endif
157
158
        }
    }
159
    energyBuffer[GLOBAL_ID] += energy;
160
161
162
163
}
/**
 * Compute forces on acceptors.
 */
164
165
166
167
168
169
170
171
KERNEL void computeAcceptorForces(
#ifdef SUPPORTS_64_BIT_ATOMICS
	GLOBAL mm_ulong* RESTRICT force,
#else
	GLOBAL real4* RESTRICT forceBuffers, GLOBAL const int4* RESTRICT acceptorBufferIndices,
#endif
        GLOBAL mixed* RESTRICT energyBuffer, GLOBAL const real4* RESTRICT posq, GLOBAL const int4* RESTRICT exclusions,
        GLOBAL const int4* RESTRICT donorAtoms, GLOBAL const int4* RESTRICT acceptorAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize,
172
        real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
173
        PARAMETER_ARGUMENTS) {
174
175
176
177
178
    LOCAL real4 posBuffer[3*THREAD_BLOCK_SIZE];
    real3 f1 = make_real3(0);
    real3 f2 = make_real3(0);
    real3 f3 = make_real3(0);
    for (int acceptorStart = 0; acceptorStart < NUM_ACCEPTORS; acceptorStart += GLOBAL_SIZE) {
179
180
        // Load information about the acceptor this thread will compute forces on.

181
        int acceptorIndex = acceptorStart+GLOBAL_ID;
182
        int4 atoms, exclusionIndices;
183
        real4 a1, a2, a3;
184
185
        if (acceptorIndex < NUM_ACCEPTORS) {
            atoms = acceptorAtoms[acceptorIndex];
186
187
188
            a1 = (atoms.x > -1 ? posq[atoms.x] : make_real4(0));
            a2 = (atoms.y > -1 ? posq[atoms.y] : make_real4(0));
            a3 = (atoms.z > -1 ? posq[atoms.z] : make_real4(0));
189
190
191
#ifdef USE_EXCLUSIONS
            exclusionIndices = exclusions[acceptorIndex];
#endif
192
193
        }
        else
194
195
            atoms = make_int4(-1, -1, -1, -1);
        for (int donorStart = 0; donorStart < NUM_DONORS; donorStart += LOCAL_SIZE) {
196
197
            // Load the next block of donors into local memory.

198
199
200
201
202
203
204
205
206
            SYNC_THREADS;
            int blockSize = min((int) LOCAL_SIZE, NUM_DONORS-donorStart);
            if (LOCAL_ID < blockSize) {
                int4 atoms2 = donorAtoms[donorStart+LOCAL_ID];
                posBuffer[3*LOCAL_ID] = (atoms2.x > -1 ? posq[atoms2.x] : make_real4(0));
                posBuffer[3*LOCAL_ID+1] = (atoms2.y > -1 ? posq[atoms2.y] : make_real4(0));
                posBuffer[3*LOCAL_ID+2] = (atoms2.z > -1 ? posq[atoms2.z] : make_real4(0));
            }
            SYNC_THREADS;
207
208
            if (acceptorIndex < NUM_ACCEPTORS) {
                for (int index = 0; index < blockSize; index++) {
209
                    int donorIndex = donorStart+index;
210
#ifdef USE_EXCLUSIONS
211
212
213
                    if (donorIndex == exclusionIndices.x || donorIndex == exclusionIndices.y || donorIndex == exclusionIndices.z || donorIndex == exclusionIndices.w)
                        continue;
#endif
214
215
                    // Compute the interaction between a donor and an acceptor.

216
217
218
                    real4 d1 = posBuffer[3*index];
                    real4 d2 = posBuffer[3*index+1];
                    real4 d3 = posBuffer[3*index+2];
Peter Eastman's avatar
Peter Eastman committed
219
                    real4 deltaD1A1 = delta(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
220
221
#ifdef USE_CUTOFF
                    if (deltaD1A1.w < CUTOFF_SQUARED) {
222
#endif
223
                        COMPUTE_ACCEPTOR_FORCE
224
#ifdef USE_CUTOFF
225
                    }
226
#endif
227
                }
228
229
230
231
232
            }
        }

        // Write results

233
        if (acceptorIndex < NUM_ACCEPTORS) {
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
#ifdef SUPPORTS_64_BIT_ATOMICS
            if (atoms.x > -1) {
                ATOMIC_ADD(&force[atoms.x], (mm_ulong) ((mm_long) (f1.x*0x100000000)));
                ATOMIC_ADD(&force[atoms.x+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f1.y*0x100000000)));
                ATOMIC_ADD(&force[atoms.x+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f1.z*0x100000000)));
                MEM_FENCE;
            }
            if (atoms.y > -1) {
                ATOMIC_ADD(&force[atoms.y], (mm_ulong) ((mm_long) (f2.x*0x100000000)));
                ATOMIC_ADD(&force[atoms.y+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f2.y*0x100000000)));
                ATOMIC_ADD(&force[atoms.y+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f2.z*0x100000000)));
                MEM_FENCE;
            }
            if (atoms.z > -1) {
                ATOMIC_ADD(&force[atoms.z], (mm_ulong) ((mm_long) (f3.x*0x100000000)));
                ATOMIC_ADD(&force[atoms.z+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f3.y*0x100000000)));
                ATOMIC_ADD(&force[atoms.z+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (f3.z*0x100000000)));
                MEM_FENCE;
            }
#else
254
255
256
            int4 bufferIndices = acceptorBufferIndices[acceptorIndex];
            if (atoms.x > -1) {
                unsigned int offset = atoms.x+bufferIndices.x*PADDED_NUM_ATOMS;
257
                real4 force = forceBuffers[offset];
258
259
260
261
262
                force.xyz += f1.xyz;
                forceBuffers[offset] = force;
            }
            if (atoms.y > -1) {
                unsigned int offset = atoms.y+bufferIndices.y*PADDED_NUM_ATOMS;
263
                real4 force = forceBuffers[offset];
264
265
266
267
268
                force.xyz += f2.xyz;
                forceBuffers[offset] = force;
            }
            if (atoms.z > -1) {
                unsigned int offset = atoms.z+bufferIndices.z*PADDED_NUM_ATOMS;
269
                real4 force = forceBuffers[offset];
270
271
272
                force.xyz += f3.xyz;
                forceBuffers[offset] = force;
            }
273
#endif
274
        }
275
276
    }
}