cmapTorsionForce.cl 6.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
/**
 * Compute CNAP torsion forces.
 */

__kernel void computeCMAPTorsionForces(int numAtoms, int numTorsions, __global float4* forceBuffers, __global float* energyBuffer,
        __global float4* posq, __global float4* coeff, __global int2* mapPositions, __global int16* indices, __global int* maps) {
    const float PI = 3.14159265358979323846f;
    float energy = 0.0f;
    for (int index = get_global_id(0); index < numTorsions; index += get_global_size(0)) {
        int16 atoms = indices[index];
        float4 a1 = posq[atoms.s0];
        float4 a2 = posq[atoms.s1];
        float4 a3 = posq[atoms.s2];
        float4 a4 = posq[atoms.s3];
        float4 b1 = posq[atoms.s4];
        float4 b2 = posq[atoms.s5];
        float4 b3 = posq[atoms.s6];
        float4 b4 = posq[atoms.s7];

        // Compute the first angle.

        float4 v0a = (float4) (a1.xyz-a2.xyz, 0.0f);
        float4 v1a = (float4) (a3.xyz-a2.xyz, 0.0f);
        float4 v2a = (float4) (a3.xyz-a4.xyz, 0.0f);
        float4 cp0a = cross(v0a, v1a);
        float4 cp1a = cross(v1a, v2a);
        float cosangle = dot(normalize(cp0a), normalize(cp1a));
        float angleA;
        if (cosangle > 0.99f || cosangle < -0.99f) {
            // We're close to the singularity in acos(), so take the cross product and use asin() instead.

            float4 cross_prod = cross(cp0a, cp1a);
            float scale = dot(cp0a, cp0a)*dot(cp1a, cp1a);
            angleA = asin(sqrt(dot(cross_prod, cross_prod)/scale));
            if (cosangle < 0.0f)
                angleA = PI-angleA;
        }
        else
           angleA = acos(cosangle);
        angleA = (dot(v0a, cp1a) >= 0 ? angleA : -angleA);
        angleA = fmod(angleA+2.0f*PI, 2.0f*PI);

        // Compute the second angle.

        float4 v0b = (float4) (b1.xyz-b2.xyz, 0.0f);
        float4 v1b = (float4) (b3.xyz-b2.xyz, 0.0f);
        float4 v2b = (float4) (b3.xyz-b4.xyz, 0.0f);
        float4 cp0b = cross(v0b, v1b);
        float4 cp1b = cross(v1b, v2b);
        cosangle = dot(normalize(cp0b), normalize(cp1b));
        float angleB;
        if (cosangle > 0.99f || cosangle < -0.99f) {
            // We're close to the singularity in acos(), so take the cross product and use asin() instead.

            float4 cross_prod = cross(cp0b, cp1b);
            float scale = dot(cp0b, cp0b)*dot(cp1b, cp1b);
            angleB = asin(sqrt(dot(cross_prod, cross_prod)/scale));
            if (cosangle < 0.0f)
                angleB = PI-angleB;
        }
        else
           angleB = acos(cosangle);
        angleB = (dot(v0b, cp1b) >= 0 ? angleB : -angleB);
        angleB = fmod(angleB+2.0f*PI, 2.0f*PI);

        // Identify which patch this is in.

        int2 pos = mapPositions[maps[index]];
        int size = pos.y;
        float delta = 2*PI/size;
        int s = (int) (angleA/delta);
        int t = (int) (angleB/delta);
        float4 c[4];
        int coeffIndex = 4*(pos.x+s+size*t);
        c[0] = coeff[coeffIndex];
        c[1] = coeff[coeffIndex+1];
        c[2] = coeff[coeffIndex+2];
        c[3] = coeff[coeffIndex+3];
        float da = angleA/delta-s;
        float db = angleB/delta-t;

        // Evaluate the spline to determine the energy and gradients.

        float torsionEnergy = 0.0f;
        float dEdA = 0.0f;
        float dEdB = 0.0f;
        torsionEnergy = da*torsionEnergy + ((c[3].w*db + c[3].z)*db + c[3].y)*db + c[3].x;
        dEdA = db*dEdA + (3.0f*c[3].w*da + 2.0f*c[2].w)*da + c[1].w;
        dEdB = da*dEdB + (3.0f*c[3].w*db + 2.0f*c[3].z)*db + c[3].y;
        torsionEnergy = da*torsionEnergy + ((c[2].w*db + c[2].z)*db + c[2].y)*db + c[2].x;
        dEdA = db*dEdA + (3.0f*c[3].z*da + 2.0f*c[2].z)*da + c[1].z;
        dEdB = da*dEdB + (3.0f*c[2].w*db + 2.0f*c[2].z)*db + c[2].y;
        torsionEnergy = da*torsionEnergy + ((c[1].w*db + c[1].z)*db + c[1].y)*db + c[1].x;
        dEdA = db*dEdA + (3.0f*c[3].y*da + 2.0f*c[2].y)*da + c[1].y;
        dEdB = da*dEdB + (3.0f*c[1].w*db + 2.0f*c[1].z)*db + c[1].y;
        torsionEnergy = da*torsionEnergy + ((c[0].w*db + c[0].z)*db + c[0].y)*db + c[0].x;
        dEdA = db*dEdA + (3.0f*c[3].x*da + 2.0f*c[2].x)*da + c[1].x;
        dEdB = da*dEdB + (3.0f*c[0].w*db + 2.0f*c[0].z)*db + c[0].y;
        dEdA /= delta;
        dEdB /= delta;
        energy += torsionEnergy;

        // Apply the force to the first torsion.

        float normCross1 = dot(cp0a, cp0a);
        float normSqrBC = dot(v1a, v1a);
        float normBC = sqrt(normSqrBC);
        float normCross2 = dot(cp1a, cp1a);
        float dp = 1.0f/normSqrBC;
        float4 ff = (float4) ((-dEdA*normBC)/normCross1, dot(v0a, v1a)*dp, dot(v2a, v1a)*dp, (dEdA*normBC)/normCross2);
        float4 internalF0 = ff.x*cp0a;
        float4 internalF3 = ff.w*cp1a;
        float4 d = ff.y*internalF0 - ff.z*internalF3;
        int4 offset = atoms.lo.lo + numAtoms*atoms.hi.lo;
        float4 forceA = forceBuffers[offset.x];
        float4 forceB = forceBuffers[offset.y];
        float4 forceC = forceBuffers[offset.z];
        float4 forceD = forceBuffers[offset.w];
        forceA.xyz += internalF0.xyz;
        forceB.xyz += d.xyz-internalF0.xyz;
        forceC.xyz += -d.xyz-internalF3.xyz;
        forceD.xyz += internalF3.xyz;
        forceBuffers[offset.x] = forceA;
        forceBuffers[offset.y] = forceB;
        forceBuffers[offset.z] = forceC;
        forceBuffers[offset.w] = forceD;

        // Apply the force to the second torsion.

        normCross1 = dot(cp0b, cp0b);
        normSqrBC = dot(v1b, v1b);
        normBC = sqrt(normSqrBC);
        normCross2 = dot(cp1b, cp1b);
        dp = 1.0f/normSqrBC;
        ff = (float4) ((-dEdB*normBC)/normCross1, dot(v0b, v1b)*dp, dot(v2b, v1b)*dp, (dEdB*normBC)/normCross2);
        internalF0 = ff.x*cp0b;
        internalF3 = ff.w*cp1b;
        d = ff.y*internalF0 - ff.z*internalF3;
        offset = atoms.lo.hi + numAtoms*atoms.hi.hi;
        forceA = forceBuffers[offset.x];
        forceB = forceBuffers[offset.y];
        forceC = forceBuffers[offset.z];
        forceD = forceBuffers[offset.w];
        forceA.xyz += internalF0.xyz;
        forceB.xyz += d.xyz-internalF0.xyz;
        forceC.xyz += -d.xyz-internalF3.xyz;
        forceD.xyz += internalF3.xyz;
        forceBuffers[offset.x] = forceA;
        forceBuffers[offset.y] = forceB;
        forceBuffers[offset.z] = forceC;
        forceBuffers[offset.w] = forceD;
    }
    energyBuffer[get_global_id(0)] += energy;
}