customHbondForce.cu 11 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
/**
 * Convert a real4 to a real3 by removing its last element.
 */
inline __device__ real3 trim(real4 v) {
    return make_real3(v.x, v.y, v.z);
}

/**
 * This does nothing, and just exists to simply the code generation.
 */
inline __device__ real3 trim(real3 v) {
    return v;
}

/**
 * Compute the difference between two vectors, setting the fourth component to the squared magnitude.
 */
inline __device__ real4 delta(real4 vec1, real4 vec2) {
    real4 result = make_real4(vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0.0f);
    result.w = result.x*result.x + result.y*result.y + result.z*result.z;
    return result;
}

/**
 * Compute the difference between two vectors, taking periodic boundary conditions into account
 * and setting the fourth component to the squared magnitude.
 */
28
inline __device__ real4 deltaPeriodic(real4 vec1, real4 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
29
30
    real4 result = make_real4(vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0.0f);
#ifdef USE_PERIODIC
31
    APPLY_PERIODIC_TO_DELTA(result)
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#endif
    result.w = result.x*result.x + result.y*result.y + result.z*result.z;
    return result;
}

/**
 * Compute the angle between two vectors.  The w component of each vector should contain the squared magnitude.
 */
inline __device__ real computeAngle(real4 vec1, real4 vec2) {
    real dotProduct = vec1.x*vec2.x + vec1.y*vec2.y + vec1.z*vec2.z;
    real cosine = dotProduct*RSQRT(vec1.w*vec2.w);
    real angle;
    if (cosine > 0.99f || cosine < -0.99f) {
        // We're close to the singularity in acos(), so take the cross product and use asin() instead.

        real3 crossProduct = cross(vec1, vec2);
        real scale = vec1.w*vec2.w;
49
        angle = ASIN(SQRT(dot(crossProduct, crossProduct)/scale));
50
51
52
53
        if (cosine < 0.0f)
            angle = M_PI-angle;
    }
    else
54
       angle = ACOS(cosine);
55
56
57
58
59
60
61
62
63
64
65
66
67
68
    return angle;
}

/**
 * Compute the cross product of two vectors, setting the fourth component to the squared magnitude.
 */
inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
    real3 result = cross(vec1, vec2);
    return make_real4(result.x, result.y, result.z, result.x*result.x + result.y*result.y + result.z*result.z);
}

/**
 * Compute forces on donors.
 */
69
extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ force, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq,
70
71
        const int4* __restrict__ exclusions, const int4* __restrict__ donorAtoms, const int4* __restrict__ acceptorAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize,
        real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
72
73
        PARAMETER_ARGUMENTS) {
    extern __shared__ real4 posBuffer[];
74
    mixed energy = 0;
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
    real3 f1 = make_real3(0);
    real3 f2 = make_real3(0);
    real3 f3 = make_real3(0);
    for (int donorStart = 0; donorStart < NUM_DONORS; donorStart += blockDim.x*gridDim.x) {
        // Load information about the donor this thread will compute forces on.

        int donorIndex = donorStart+blockIdx.x*blockDim.x+threadIdx.x;
        int4 atoms, exclusionIndices;
        real4 d1, d2, d3;
        if (donorIndex < NUM_DONORS) {
            atoms = donorAtoms[donorIndex];
            d1 = (atoms.x > -1 ? posq[atoms.x] : make_real4(0));
            d2 = (atoms.y > -1 ? posq[atoms.y] : make_real4(0));
            d3 = (atoms.z > -1 ? posq[atoms.z] : make_real4(0));
#ifdef USE_EXCLUSIONS
            exclusionIndices = exclusions[donorIndex];
#endif
        }
        else
            atoms = make_int4(-1, -1, -1, -1);
        for (int acceptorStart = 0; acceptorStart < NUM_ACCEPTORS; acceptorStart += blockDim.x) {
            // Load the next block of acceptors into local memory.

            int blockSize = min((int) blockDim.x, NUM_ACCEPTORS-acceptorStart);
            if (threadIdx.x < blockSize) {
                int4 atoms2 = acceptorAtoms[acceptorStart+threadIdx.x];
                posBuffer[3*threadIdx.x] = (atoms2.x > -1 ? posq[atoms2.x] : make_real4(0));
                posBuffer[3*threadIdx.x+1] = (atoms2.y > -1 ? posq[atoms2.y] : make_real4(0));
                posBuffer[3*threadIdx.x+2] = (atoms2.z > -1 ? posq[atoms2.z] : make_real4(0));
            }
            __syncthreads();
            if (donorIndex < NUM_DONORS) {
                for (int index = 0; index < blockSize; index++) {
                    int acceptorIndex = acceptorStart+index;
109
#ifdef USE_EXCLUSIONS
110
111
112
113
114
115
116
117
                    if (acceptorIndex == exclusionIndices.x || acceptorIndex == exclusionIndices.y || acceptorIndex == exclusionIndices.z || acceptorIndex == exclusionIndices.w)
                        continue;
#endif
                    // Compute the interaction between a donor and an acceptor.

                    real4 a1 = posBuffer[3*index];
                    real4 a2 = posBuffer[3*index+1];
                    real4 a3 = posBuffer[3*index+2];
118
                    real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#ifdef USE_CUTOFF
                    if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif
                        COMPUTE_DONOR_FORCE
#ifdef USE_CUTOFF
                    }
#endif
                }
            }
        }

        // Write results

        if (donorIndex < NUM_DONORS) {
            if (atoms.x > -1) {
134
135
136
                atomicAdd(&force[atoms.x], static_cast<unsigned long long>((long long) (f1.x*0x100000000)));
                atomicAdd(&force[atoms.x+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f1.y*0x100000000)));
                atomicAdd(&force[atoms.x+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f1.z*0x100000000)));
137
138
139
                __threadfence_block();
            }
            if (atoms.y > -1) {
140
141
142
                atomicAdd(&force[atoms.y], static_cast<unsigned long long>((long long) (f2.x*0x100000000)));
                atomicAdd(&force[atoms.y+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f2.y*0x100000000)));
                atomicAdd(&force[atoms.y+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f2.z*0x100000000)));
143
144
145
                __threadfence_block();
            }
            if (atoms.z > -1) {
146
147
148
                atomicAdd(&force[atoms.z], static_cast<unsigned long long>((long long) (f3.x*0x100000000)));
                atomicAdd(&force[atoms.z+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f3.y*0x100000000)));
                atomicAdd(&force[atoms.z+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f3.z*0x100000000)));
149
150
151
152
153
154
155
156
157
                __threadfence_block();
            }
        }
    }
    energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
}
/**
 * Compute forces on acceptors.
 */
158
extern "C" __global__ void computeAcceptorForces(unsigned long long* __restrict__ force, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq,
159
160
        const int4* __restrict__ exclusions, const int4* __restrict__ donorAtoms, const int4* __restrict__ acceptorAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize,
        real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
        PARAMETER_ARGUMENTS) {
    extern __shared__ real4 posBuffer[];
    real3 f1 = make_real3(0);
    real3 f2 = make_real3(0);
    real3 f3 = make_real3(0);
    for (int acceptorStart = 0; acceptorStart < NUM_ACCEPTORS; acceptorStart += blockDim.x*gridDim.x) {
        // Load information about the acceptor this thread will compute forces on.

        int acceptorIndex = acceptorStart+blockIdx.x*blockDim.x+threadIdx.x;
        int4 atoms, exclusionIndices;
        real4 a1, a2, a3;
        if (acceptorIndex < NUM_ACCEPTORS) {
            atoms = acceptorAtoms[acceptorIndex];
            a1 = (atoms.x > -1 ? posq[atoms.x] : make_real4(0));
            a2 = (atoms.y > -1 ? posq[atoms.y] : make_real4(0));
            a3 = (atoms.z > -1 ? posq[atoms.z] : make_real4(0));
#ifdef USE_EXCLUSIONS
            exclusionIndices = exclusions[acceptorIndex];
#endif
        }
        else
            atoms = make_int4(-1, -1, -1, -1);
        for (int donorStart = 0; donorStart < NUM_DONORS; donorStart += blockDim.x) {
            // Load the next block of donors into local memory.

            int blockSize = min((int) blockDim.x, NUM_DONORS-donorStart);
            if (threadIdx.x < blockSize) {
                int4 atoms2 = donorAtoms[donorStart+threadIdx.x];
                posBuffer[3*threadIdx.x] = (atoms2.x > -1 ? posq[atoms2.x] : make_real4(0));
                posBuffer[3*threadIdx.x+1] = (atoms2.y > -1 ? posq[atoms2.y] : make_real4(0));
                posBuffer[3*threadIdx.x+2] = (atoms2.z > -1 ? posq[atoms2.z] : make_real4(0));
            }
            __syncthreads();
            if (acceptorIndex < NUM_ACCEPTORS) {
                for (int index = 0; index < blockSize; index++) {
                    int donorIndex = donorStart+index;
197
#ifdef USE_EXCLUSIONS
198
199
200
201
202
203
204
205
                    if (donorIndex == exclusionIndices.x || donorIndex == exclusionIndices.y || donorIndex == exclusionIndices.z || donorIndex == exclusionIndices.w)
                        continue;
#endif
                    // Compute the interaction between a donor and an acceptor.

                    real4 d1 = posBuffer[3*index];
                    real4 d2 = posBuffer[3*index+1];
                    real4 d3 = posBuffer[3*index+2];
206
                    real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#ifdef USE_CUTOFF
                    if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif
                        COMPUTE_ACCEPTOR_FORCE
#ifdef USE_CUTOFF
                    }
#endif
                }
            }
        }

        // Write results

        if (acceptorIndex < NUM_ACCEPTORS) {
            if (atoms.x > -1) {
222
223
224
                atomicAdd(&force[atoms.x], static_cast<unsigned long long>((long long) (f1.x*0x100000000)));
                atomicAdd(&force[atoms.x+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f1.y*0x100000000)));
                atomicAdd(&force[atoms.x+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f1.z*0x100000000)));
225
226
227
                __threadfence_block();
            }
            if (atoms.y > -1) {
228
229
230
                atomicAdd(&force[atoms.y], static_cast<unsigned long long>((long long) (f2.x*0x100000000)));
                atomicAdd(&force[atoms.y+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f2.y*0x100000000)));
                atomicAdd(&force[atoms.y+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f2.z*0x100000000)));
231
232
233
                __threadfence_block();
            }
            if (atoms.z > -1) {
234
235
236
                atomicAdd(&force[atoms.z], static_cast<unsigned long long>((long long) (f3.x*0x100000000)));
                atomicAdd(&force[atoms.z+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f3.y*0x100000000)));
                atomicAdd(&force[atoms.z+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f3.z*0x100000000)));
237
238
239
240
241
                __threadfence_block();
            }
        }
    }
}