customHbondForce.cu 10.8 KB
Newer Older
1
2
3
4
5
6
7
8
/**
 * Convert a real4 to a real3 by removing its last element.
 */
inline __device__ real3 trim(real4 v) {
    return make_real3(v.x, v.y, v.z);
}

/**
Peter Eastman's avatar
Peter Eastman committed
9
 * This does nothing, and just exists to simplify the code generation.
10
11
12
13
14
15
 */
inline __device__ real3 trim(real3 v) {
    return v;
}

/**
Peter Eastman's avatar
Peter Eastman committed
16
 * Compute the difference between two vectors, optionally taking periodic boundary conditions into account
17
18
 * and setting the fourth component to the squared magnitude.
 */
Peter Eastman's avatar
Peter Eastman committed
19
inline __device__ real4 delta(real4 vec1, real4 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
20
21
    real4 result = make_real4(vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0.0f);
#ifdef USE_PERIODIC
22
    APPLY_PERIODIC_TO_DELTA(result)
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#endif
    result.w = result.x*result.x + result.y*result.y + result.z*result.z;
    return result;
}

/**
 * Compute the angle between two vectors.  The w component of each vector should contain the squared magnitude.
 */
inline __device__ real computeAngle(real4 vec1, real4 vec2) {
    real dotProduct = vec1.x*vec2.x + vec1.y*vec2.y + vec1.z*vec2.z;
    real cosine = dotProduct*RSQRT(vec1.w*vec2.w);
    real angle;
    if (cosine > 0.99f || cosine < -0.99f) {
        // We're close to the singularity in acos(), so take the cross product and use asin() instead.

        real3 crossProduct = cross(vec1, vec2);
        real scale = vec1.w*vec2.w;
40
        angle = ASIN(SQRT(dot(crossProduct, crossProduct)/scale));
41
42
43
44
        if (cosine < 0.0f)
            angle = M_PI-angle;
    }
    else
45
       angle = ACOS(cosine);
46
47
48
49
50
51
52
53
54
55
56
57
58
59
    return angle;
}

/**
 * Compute the cross product of two vectors, setting the fourth component to the squared magnitude.
 */
inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
    real3 result = cross(vec1, vec2);
    return make_real4(result.x, result.y, result.z, result.x*result.x + result.y*result.y + result.z*result.z);
}

/**
 * Compute forces on donors.
 */
60
extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ force, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq,
61
62
        const int4* __restrict__ exclusions, const int4* __restrict__ donorAtoms, const int4* __restrict__ acceptorAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize,
        real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
63
64
        PARAMETER_ARGUMENTS) {
    extern __shared__ real4 posBuffer[];
65
    mixed energy = 0;
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
    real3 f1 = make_real3(0);
    real3 f2 = make_real3(0);
    real3 f3 = make_real3(0);
    for (int donorStart = 0; donorStart < NUM_DONORS; donorStart += blockDim.x*gridDim.x) {
        // Load information about the donor this thread will compute forces on.

        int donorIndex = donorStart+blockIdx.x*blockDim.x+threadIdx.x;
        int4 atoms, exclusionIndices;
        real4 d1, d2, d3;
        if (donorIndex < NUM_DONORS) {
            atoms = donorAtoms[donorIndex];
            d1 = (atoms.x > -1 ? posq[atoms.x] : make_real4(0));
            d2 = (atoms.y > -1 ? posq[atoms.y] : make_real4(0));
            d3 = (atoms.z > -1 ? posq[atoms.z] : make_real4(0));
#ifdef USE_EXCLUSIONS
            exclusionIndices = exclusions[donorIndex];
#endif
        }
        else
            atoms = make_int4(-1, -1, -1, -1);
        for (int acceptorStart = 0; acceptorStart < NUM_ACCEPTORS; acceptorStart += blockDim.x) {
            // Load the next block of acceptors into local memory.

Peter Eastman's avatar
Peter Eastman committed
89
            __syncthreads();
90
91
92
93
94
95
96
97
98
99
100
            int blockSize = min((int) blockDim.x, NUM_ACCEPTORS-acceptorStart);
            if (threadIdx.x < blockSize) {
                int4 atoms2 = acceptorAtoms[acceptorStart+threadIdx.x];
                posBuffer[3*threadIdx.x] = (atoms2.x > -1 ? posq[atoms2.x] : make_real4(0));
                posBuffer[3*threadIdx.x+1] = (atoms2.y > -1 ? posq[atoms2.y] : make_real4(0));
                posBuffer[3*threadIdx.x+2] = (atoms2.z > -1 ? posq[atoms2.z] : make_real4(0));
            }
            __syncthreads();
            if (donorIndex < NUM_DONORS) {
                for (int index = 0; index < blockSize; index++) {
                    int acceptorIndex = acceptorStart+index;
101
#ifdef USE_EXCLUSIONS
102
103
104
105
106
107
108
109
                    if (acceptorIndex == exclusionIndices.x || acceptorIndex == exclusionIndices.y || acceptorIndex == exclusionIndices.z || acceptorIndex == exclusionIndices.w)
                        continue;
#endif
                    // Compute the interaction between a donor and an acceptor.

                    real4 a1 = posBuffer[3*index];
                    real4 a2 = posBuffer[3*index+1];
                    real4 a3 = posBuffer[3*index+2];
Peter Eastman's avatar
Peter Eastman committed
110
                    real4 deltaD1A1 = delta(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#ifdef USE_CUTOFF
                    if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif
                        COMPUTE_DONOR_FORCE
#ifdef USE_CUTOFF
                    }
#endif
                }
            }
        }

        // Write results

        if (donorIndex < NUM_DONORS) {
            if (atoms.x > -1) {
126
127
128
                atomicAdd(&force[atoms.x], static_cast<unsigned long long>((long long) (f1.x*0x100000000)));
                atomicAdd(&force[atoms.x+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f1.y*0x100000000)));
                atomicAdd(&force[atoms.x+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f1.z*0x100000000)));
129
130
131
                __threadfence_block();
            }
            if (atoms.y > -1) {
132
133
134
                atomicAdd(&force[atoms.y], static_cast<unsigned long long>((long long) (f2.x*0x100000000)));
                atomicAdd(&force[atoms.y+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f2.y*0x100000000)));
                atomicAdd(&force[atoms.y+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f2.z*0x100000000)));
135
136
137
                __threadfence_block();
            }
            if (atoms.z > -1) {
138
139
140
                atomicAdd(&force[atoms.z], static_cast<unsigned long long>((long long) (f3.x*0x100000000)));
                atomicAdd(&force[atoms.z+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f3.y*0x100000000)));
                atomicAdd(&force[atoms.z+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f3.z*0x100000000)));
141
142
143
144
145
146
147
148
149
                __threadfence_block();
            }
        }
    }
    energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
}
/**
 * Compute forces on acceptors.
 */
150
extern "C" __global__ void computeAcceptorForces(unsigned long long* __restrict__ force, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq,
151
152
        const int4* __restrict__ exclusions, const int4* __restrict__ donorAtoms, const int4* __restrict__ acceptorAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize,
        real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
        PARAMETER_ARGUMENTS) {
    extern __shared__ real4 posBuffer[];
    real3 f1 = make_real3(0);
    real3 f2 = make_real3(0);
    real3 f3 = make_real3(0);
    for (int acceptorStart = 0; acceptorStart < NUM_ACCEPTORS; acceptorStart += blockDim.x*gridDim.x) {
        // Load information about the acceptor this thread will compute forces on.

        int acceptorIndex = acceptorStart+blockIdx.x*blockDim.x+threadIdx.x;
        int4 atoms, exclusionIndices;
        real4 a1, a2, a3;
        if (acceptorIndex < NUM_ACCEPTORS) {
            atoms = acceptorAtoms[acceptorIndex];
            a1 = (atoms.x > -1 ? posq[atoms.x] : make_real4(0));
            a2 = (atoms.y > -1 ? posq[atoms.y] : make_real4(0));
            a3 = (atoms.z > -1 ? posq[atoms.z] : make_real4(0));
#ifdef USE_EXCLUSIONS
            exclusionIndices = exclusions[acceptorIndex];
#endif
        }
        else
            atoms = make_int4(-1, -1, -1, -1);
        for (int donorStart = 0; donorStart < NUM_DONORS; donorStart += blockDim.x) {
            // Load the next block of donors into local memory.

Peter Eastman's avatar
Peter Eastman committed
178
            __syncthreads();
179
180
181
182
183
184
185
186
187
188
189
            int blockSize = min((int) blockDim.x, NUM_DONORS-donorStart);
            if (threadIdx.x < blockSize) {
                int4 atoms2 = donorAtoms[donorStart+threadIdx.x];
                posBuffer[3*threadIdx.x] = (atoms2.x > -1 ? posq[atoms2.x] : make_real4(0));
                posBuffer[3*threadIdx.x+1] = (atoms2.y > -1 ? posq[atoms2.y] : make_real4(0));
                posBuffer[3*threadIdx.x+2] = (atoms2.z > -1 ? posq[atoms2.z] : make_real4(0));
            }
            __syncthreads();
            if (acceptorIndex < NUM_ACCEPTORS) {
                for (int index = 0; index < blockSize; index++) {
                    int donorIndex = donorStart+index;
190
#ifdef USE_EXCLUSIONS
191
192
193
194
195
196
197
198
                    if (donorIndex == exclusionIndices.x || donorIndex == exclusionIndices.y || donorIndex == exclusionIndices.z || donorIndex == exclusionIndices.w)
                        continue;
#endif
                    // Compute the interaction between a donor and an acceptor.

                    real4 d1 = posBuffer[3*index];
                    real4 d2 = posBuffer[3*index+1];
                    real4 d3 = posBuffer[3*index+2];
Peter Eastman's avatar
Peter Eastman committed
199
                    real4 deltaD1A1 = delta(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
#ifdef USE_CUTOFF
                    if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif
                        COMPUTE_ACCEPTOR_FORCE
#ifdef USE_CUTOFF
                    }
#endif
                }
            }
        }

        // Write results

        if (acceptorIndex < NUM_ACCEPTORS) {
            if (atoms.x > -1) {
215
216
217
                atomicAdd(&force[atoms.x], static_cast<unsigned long long>((long long) (f1.x*0x100000000)));
                atomicAdd(&force[atoms.x+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f1.y*0x100000000)));
                atomicAdd(&force[atoms.x+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f1.z*0x100000000)));
218
219
220
                __threadfence_block();
            }
            if (atoms.y > -1) {
221
222
223
                atomicAdd(&force[atoms.y], static_cast<unsigned long long>((long long) (f2.x*0x100000000)));
                atomicAdd(&force[atoms.y+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f2.y*0x100000000)));
                atomicAdd(&force[atoms.y+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f2.z*0x100000000)));
224
225
226
                __threadfence_block();
            }
            if (atoms.z > -1) {
227
228
229
                atomicAdd(&force[atoms.z], static_cast<unsigned long long>((long long) (f3.x*0x100000000)));
                atomicAdd(&force[atoms.z+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f3.y*0x100000000)));
                atomicAdd(&force[atoms.z+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (f3.z*0x100000000)));
230
231
232
233
234
                __threadfence_block();
            }
        }
    }
}