settle.cl 8.96 KB
Newer Older
1
2
3
4
/**
 * Enforce constraints on SETTLE clusters
 */

5
__kernel void applySettle(int numClusters, float tol, __global const float4* restrict oldPos, __global float4* restrict posDelta, __global const float4* restrict velm, __global const int4* restrict clusterAtoms, __global const float2* restrict clusterParams) {
6
7
8
9
10
11
12
13
14
15
16
17
    int index = get_global_id(0);
    while (index < numClusters) {
        // Load the data for this cluster.

        int4 atoms = clusterAtoms[index];
        float2 params = clusterParams[index];
        float4 apos0 = oldPos[atoms.x];
        float4 xp0 = posDelta[atoms.x];
        float4 apos1 = oldPos[atoms.y];
        float4 xp1 = posDelta[atoms.y];
        float4 apos2 = oldPos[atoms.z];
        float4 xp2 = posDelta[atoms.z];
Peter Eastman's avatar
Peter Eastman committed
18
19
20
        float m0 = RECIP(velm[atoms.x].w);
        float m1 = RECIP(velm[atoms.y].w);
        float m2 = RECIP(velm[atoms.z].w);
21
22
23
24
25
26
27
28
29
30

        // Apply the SETTLE algorithm.

        float xb0 = apos1.x-apos0.x;
        float yb0 = apos1.y-apos0.y;
        float zb0 = apos1.z-apos0.z;
        float xc0 = apos2.x-apos0.x;
        float yc0 = apos2.y-apos0.y;
        float zc0 = apos2.z-apos0.z;

Peter Eastman's avatar
Peter Eastman committed
31
32
33
34
35
36
37
38
39
40
41
42
43
44
        float invTotalMass = 1.0f/(m0+m1+m2);
        float xcom = (xp0.x*m0 + (xb0+xp1.x)*m1 + (xc0+xp2.x)*m2) * invTotalMass;
        float ycom = (xp0.y*m0 + (yb0+xp1.y)*m1 + (yc0+xp2.y)*m2) * invTotalMass;
        float zcom = (xp0.z*m0 + (zb0+xp1.z)*m1 + (zc0+xp2.z)*m2) * invTotalMass;

        float xa1 = xp0.x - xcom;
        float ya1 = xp0.y - ycom;
        float za1 = xp0.z - zcom;
        float xb1 = xb0 + xp1.x - xcom;
        float yb1 = yb0 + xp1.y - ycom;
        float zb1 = zb0 + xp1.z - zcom;
        float xc1 = xc0 + xp2.x - xcom;
        float yc1 = yc0 + xp2.y - ycom;
        float zc1 = zc0 + xp2.z - zcom;
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84

        float xaksZd = yb0*zc0 - zb0*yc0;
        float yaksZd = zb0*xc0 - xb0*zc0;
        float zaksZd = xb0*yc0 - yb0*xc0;
        float xaksXd = ya1*zaksZd - za1*yaksZd;
        float yaksXd = za1*xaksZd - xa1*zaksZd;
        float zaksXd = xa1*yaksZd - ya1*xaksZd;
        float xaksYd = yaksZd*zaksXd - zaksZd*yaksXd;
        float yaksYd = zaksZd*xaksXd - xaksZd*zaksXd;
        float zaksYd = xaksZd*yaksXd - yaksZd*xaksXd;

        float axlng = sqrt(xaksXd*xaksXd + yaksXd*yaksXd + zaksXd*zaksXd);
        float aylng = sqrt(xaksYd*xaksYd + yaksYd*yaksYd + zaksYd*zaksYd);
        float azlng = sqrt(xaksZd*xaksZd + yaksZd*yaksZd + zaksZd*zaksZd);
        float trns11 = xaksXd / axlng;
        float trns21 = yaksXd / axlng;
        float trns31 = zaksXd / axlng;
        float trns12 = xaksYd / aylng;
        float trns22 = yaksYd / aylng;
        float trns32 = zaksYd / aylng;
        float trns13 = xaksZd / azlng;
        float trns23 = yaksZd / azlng;
        float trns33 = zaksZd / azlng;

        float xb0d = trns11*xb0 + trns21*yb0 + trns31*zb0;
        float yb0d = trns12*xb0 + trns22*yb0 + trns32*zb0;
        float xc0d = trns11*xc0 + trns21*yc0 + trns31*zc0;
        float yc0d = trns12*xc0 + trns22*yc0 + trns32*zc0;
        float za1d = trns13*xa1 + trns23*ya1 + trns33*za1;
        float xb1d = trns11*xb1 + trns21*yb1 + trns31*zb1;
        float yb1d = trns12*xb1 + trns22*yb1 + trns32*zb1;
        float zb1d = trns13*xb1 + trns23*yb1 + trns33*zb1;
        float xc1d = trns11*xc1 + trns21*yc1 + trns31*zc1;
        float yc1d = trns12*xc1 + trns22*yc1 + trns32*zc1;
        float zc1d = trns13*xc1 + trns23*yc1 + trns33*zc1;

        //                                        --- Step2  A2' ---

        float rc = 0.5*params.y;
        float rb = sqrt(params.x*params.x-rc*rc);
Peter Eastman's avatar
Peter Eastman committed
85
        float ra = rb*(m1+m2)*invTotalMass;
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
        rb -= ra;
        float sinphi = za1d / ra;
        float cosphi = sqrt(1.0f - sinphi*sinphi);
        float sinpsi = (zb1d - zc1d) / (2*rc*cosphi);
        float cospsi = sqrt(1.0f - sinpsi*sinpsi);

        float ya2d =   ra*cosphi;
        float xb2d = - rc*cospsi;
        float yb2d = - rb*cosphi - rc*sinpsi*sinphi;
        float yc2d = - rb*cosphi + rc*sinpsi*sinphi;
        float xb2d2 = xb2d*xb2d;
        float hh2 = 4.0f*xb2d2 + (yb2d-yc2d)*(yb2d-yc2d) + (zb1d-zc1d)*(zb1d-zc1d);
        float deltx = 2.0f*xb2d + sqrt(4.0f*xb2d2 - hh2 + params.y*params.y);
        xb2d -= deltx*0.5;

        //                                        --- Step3  al,be,ga ---

        float alpha = (xb2d*(xb0d-xc0d) + yb0d*yb2d + yc0d*yc2d);
        float beta = (xb2d*(yc0d-yb0d) + xb0d*yb2d + xc0d*yc2d);
        float gamma = xb0d*yb1d - xb1d*yb0d + xc0d*yc1d - xc1d*yc0d;

        float al2be2 = alpha*alpha + beta*beta;
Peter Eastman's avatar
Peter Eastman committed
108
        float sintheta = (alpha*gamma - beta*sqrt(al2be2 - gamma*gamma)) / al2be2;
109
110
111

        //                                        --- Step4  A3' ---

Peter Eastman's avatar
Peter Eastman committed
112
        float costheta = sqrt(1.0f - sintheta*sintheta);
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
        float xa3d = - ya2d*sintheta;
        float ya3d =   ya2d*costheta;
        float za3d = za1d;
        float xb3d =   xb2d*costheta - yb2d*sintheta;
        float yb3d =   xb2d*sintheta + yb2d*costheta;
        float zb3d = zb1d;
        float xc3d = - xb2d*costheta - yc2d*sintheta;
        float yc3d = - xb2d*sintheta + yc2d*costheta;
        float zc3d = zc1d;

        //                                        --- Step5  A3 ---

        float xa3 = trns11*xa3d + trns12*ya3d + trns13*za3d;
        float ya3 = trns21*xa3d + trns22*ya3d + trns23*za3d;
        float za3 = trns31*xa3d + trns32*ya3d + trns33*za3d;
        float xb3 = trns11*xb3d + trns12*yb3d + trns13*zb3d;
        float yb3 = trns21*xb3d + trns22*yb3d + trns23*zb3d;
        float zb3 = trns31*xb3d + trns32*yb3d + trns33*zb3d;
        float xc3 = trns11*xc3d + trns12*yc3d + trns13*zc3d;
        float yc3 = trns21*xc3d + trns22*yc3d + trns23*zc3d;
        float zc3 = trns31*xc3d + trns32*yc3d + trns33*zc3d;

Peter Eastman's avatar
Peter Eastman committed
135
136
137
138
139
140
141
142
143
        xp0.x = xcom + xa3;
        xp0.y = ycom + ya3;
        xp0.z = zcom + za3;
        xp1.x = xcom + xb3 - xb0;
        xp1.y = ycom + yb3 - yb0;
        xp1.z = zcom + zb3 - zb0;
        xp2.x = xcom + xc3 - xc0;
        xp2.y = ycom + yc3 - yc0;
        xp2.z = zcom + zc3 - zc0;
144
145
146

        // Record the new positions.

147
148
149
        posDelta[atoms.x] = xp0;
        posDelta[atoms.y] = xp1;
        posDelta[atoms.z] = xp2;
150
151
152
        index += get_global_size(0);
    }
}
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169

/**
 * Enforce velocity constraints on SETTLE clusters
 */

__kernel void constrainVelocities(int numClusters, float tol, __global const float4* restrict oldPos, __global float4* restrict posDelta, __global float4* restrict velm, __global const int4* restrict clusterAtoms, __global const float2* restrict clusterParams) {
    for (int index = get_global_id(0); index < numClusters; index++) {
        // Load the data for this cluster.

        int4 atoms = clusterAtoms[index];
        float4 apos0 = oldPos[atoms.x];
        float4 apos1 = oldPos[atoms.y];
        float4 apos2 = oldPos[atoms.z];
        float4 v0 = velm[atoms.x];
        float4 v1 = velm[atoms.y];
        float4 v2 = velm[atoms.z];
        
170
171
        // Compute intermediate quantities: the atom masses, the bond directions, the relative velocities,
        // and the angle cosines and sines.
172
        
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
        float mA = RECIP(v0.w);
        float mB = RECIP(v1.w);
        float mC = RECIP(v2.w);
        float4 eAB = apos1-apos0;
        float4 eBC = apos2-apos1;
        float4 eCA = apos0-apos2;
        eAB.xyz /= SQRT(eAB.x*eAB.x + eAB.y*eAB.y + eAB.z*eAB.z);
        eBC.xyz /= SQRT(eBC.x*eBC.x + eBC.y*eBC.y + eBC.z*eBC.z);
        eCA.xyz /= SQRT(eCA.x*eCA.x + eCA.y*eCA.y + eCA.z*eCA.z);
        float vAB = (v1.x-v0.x)*eAB.x + (v1.y-v0.y)*eAB.y + (v1.z-v0.z)*eAB.z;
        float vBC = (v2.x-v1.x)*eBC.x + (v2.y-v1.y)*eBC.y + (v2.z-v1.z)*eBC.z;
        float vCA = (v0.x-v2.x)*eCA.x + (v0.y-v2.y)*eCA.y + (v0.z-v2.z)*eCA.z;
        float cA = -(eAB.x*eCA.x + eAB.y*eCA.y + eAB.z*eCA.z);
        float cB = -(eAB.x*eBC.x + eAB.y*eBC.y + eAB.z*eBC.z);
        float cC = -(eBC.x*eCA.x + eBC.y*eCA.y + eBC.z*eCA.z);
        float s2A = 1-cA*cA;
        float s2B = 1-cB*cB;
        float s2C = 1-cC*cC;
191
        
192
193
194
        // Solve the equations.  These are different from those in the SETTLE paper (JCC 13(8), pp. 952-962, 1992), because
        // in going from equations B1 to B2, they make the assumption that mB=mC (but don't bother to mention they're
        // making that assumption).  We allow all three atoms to have different masses.
195
        
196
        float mABC = mA*mB*mC;
Peter Eastman's avatar
Peter Eastman committed
197
198
199
200
201
        float mABCinv = 1.0f/mABC;
        float denom = (((s2A*mB+s2B*mA)*mC+(s2A*mB*mB+2*(cA*cB*cC+1)*mA*mB+s2B*mA*mA))*mC+s2C*mA*mB*(mA+mB))*mABCinv;
        float tab = ((cB*cC*mA-cA*mB-cA*mC)*vCA + (cA*cC*mB-cB*mC-cB*mA)*vBC + (mABC+s2C*mA*mA*mB*mB*mABCinv)*vAB)/denom;
        float tbc = ((cA*cB*mC-cC*mB-cC*mA)*vCA + ((s2A*mB*mB)*mC*mC*mABCinv+mABC)*vBC + (cA*cC*mB-cB*mA-cB*mC)*vAB)/denom;
        float tca = (((s2B*mA*mA)*mC*mC*mABCinv+mABC)*vCA + (cA*cB*mC-cC*mB-cC*mA)*vBC + (cB*cC*mA-cA*mB-cA*mC)*vAB)/denom;
202
203
204
        v0.xyz += (tab*eAB.xyz - tca*eCA.xyz)*v0.w;
        v1.xyz += (tbc*eBC.xyz - tab*eAB.xyz)*v1.w;
        v2.xyz += (tca*eCA.xyz - tbc*eBC.xyz)*v2.w;
205
206
207
208
209
        velm[atoms.x] = v0;
        velm[atoms.y] = v1;
        velm[atoms.z] = v2;
    }
}