shakeHydrogens.cl 5.06 KB
Newer Older
1
2
3
4
/**
 * Enforce constraints on SHAKE clusters
 */

5
__kernel void applyShakeToHydrogens(int numClusters, float tol, __global const float4* restrict oldPos, __global float4* restrict posDelta, __global const int4* restrict clusterAtoms, __global const float4* restrict clusterParams) {
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
    int index = get_global_id(0);
    while (index < numClusters) {
        // Load the data for this cluster.

        int4 atoms = clusterAtoms[index];
        float4 params = clusterParams[index];
        float4 pos = oldPos[atoms.x];
        float4 xpi = posDelta[atoms.x];
        float4 pos1 = oldPos[atoms.y];
        float4 xpj1 = posDelta[atoms.y];
        float4 pos2 = {0.0f, 0.0f, 0.0f, 0.0f};
        float4 xpj2 = {0.0f, 0.0f, 0.0f, 0.0f};
        float invMassCentral = params.x;
        float avgMass = params.y;
        float d2 = params.z;
        float invMassPeripheral = params.w;
        if (atoms.z != -1) {
            pos2 = oldPos[atoms.z];
            xpj2 = posDelta[atoms.z];
        }
        float4 pos3 = {0.0f, 0.0f, 0.0f, 0.0f};
        float4 xpj3 = {0.0f, 0.0f, 0.0f, 0.0f};
        if (atoms.w != -1) {
            pos3 = oldPos[atoms.w];
            xpj3 = posDelta[atoms.w];
        }

        // Precompute quantities.

        float4 rij1 = pos-pos1;
        float4 rij2 = pos-pos2;
        float4 rij3 = pos-pos3;
        float rij1sq = rij1.x*rij1.x + rij1.y*rij1.y + rij1.z*rij1.z;
        float rij2sq = rij2.x*rij2.x + rij2.y*rij2.y + rij2.z*rij2.z;
        float rij3sq = rij3.x*rij3.x + rij3.y*rij3.y + rij3.z*rij3.z;
        float ld1 = d2-rij1sq;
        float ld2 = d2-rij2sq;
        float ld3 = d2-rij3sq;

        // Iterate until convergence.

Peter Eastman's avatar
Peter Eastman committed
47
        int converged = false;
48
49
50
        int iteration = 0;
        while (iteration < 15 && !converged) {
            converged = true;
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#ifdef CONSTRAIN_VELOCITIES
            float4 rpij = xpi-xpj1;
            float rrpr = rpij.x*rij1.x + rpij.y*rij1.y + rpij.z*rij1.z;
            float delta = -2.0f*avgMass*rrpr/rij1sq;
            float4 dr = rij1*delta;
            xpi.xyz += dr.xyz*invMassCentral;
            xpj1.xyz -= dr.xyz*invMassPeripheral;
            if (fabs(delta) > tol)
                converged = false;
            if (atoms.z != -1) {
                rpij = xpi-xpj2;
                rrpr = rpij.x*rij2.x + rpij.y*rij2.y + rpij.z*rij2.z;
                delta = -2.0f*avgMass*rrpr/rij2sq;
                dr = rij2*delta;
                xpi.xyz += dr.xyz*invMassCentral;
                xpj2.xyz -= dr.xyz*invMassPeripheral;
                if (fabs(delta) > tol)
                    converged = false;
            }
            if (atoms.w != -1) {
                rpij = xpi-xpj3;
                rrpr = rpij.x*rij3.x + rpij.y*rij3.y + rpij.z*rij3.z;
                delta = -2.0f*avgMass*rrpr/rij3sq;
                dr = rij3*delta;
                xpi.xyz += dr.xyz*invMassCentral;
                xpj3.xyz -= dr.xyz*invMassPeripheral;
                if (fabs(delta) > tol)
                    converged = false;
            }
#else
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
            float4 rpij = xpi-xpj1;
            float rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z;
            float rrpr = rij1.x*rpij.x + rij1.y*rpij.y + rij1.z*rpij.z;
            float diff = fabs(ld1-2.0f*rrpr-rpsqij) / (d2*tol);
            if (diff >= 1.0f) {
                float acor  = (ld1-2.0f*rrpr-rpsqij)*avgMass / (rrpr+rij1sq);
                float4 dr = rij1*acor;
                xpi.xyz += dr.xyz*invMassCentral;
                xpj1.xyz -= dr.xyz*invMassPeripheral;
                converged = false;
            }
            if (atoms.z != -1) {
                rpij.xyz = xpi.xyz-xpj2.xyz;
                rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z;
                rrpr = rij2.x*rpij.x + rij2.y*rpij.y + rij2.z*rpij.z;
                diff = fabs(ld2-2.0f*rrpr-rpsqij) / (d2*tol);
                if (diff >= 1.0f) {
                    float acor  = (ld2 - 2.0f*rrpr - rpsqij)*avgMass / (rrpr + rij2sq);
                    float4 dr = rij2*acor;
                    xpi.xyz += dr.xyz*invMassCentral;
                    xpj2.xyz -= dr.xyz*invMassPeripheral;
                    converged = false;
                }
            }
            if (atoms.w != -1) {
                rpij.xyz = xpi.xyz-xpj3.xyz;
                rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z;
                rrpr = rij3.x*rpij.x + rij3.y*rpij.y + rij3.z*rpij.z;
                diff = fabs(ld3 - 2.0f*rrpr - rpsqij) / (d2*tol);
                if (diff >= 1.0f) {
                    float acor  = (ld3-2.0f*rrpr-rpsqij)*avgMass / (rrpr+rij3sq);
                    float4 dr = rij3*acor;
                    xpi.xyz += dr.xyz*invMassCentral;
                    xpj3.xyz -= dr.xyz*invMassPeripheral;
                    converged = false;
                }
            }
118
#endif
119
120
121
122
123
            iteration++;
        }

        // Record the new positions.

124
125
        posDelta[atoms.x] = xpi;
        posDelta[atoms.y] = xpj1;
126
        if (atoms.z != -1)
127
            posDelta[atoms.z] = xpj2;
128
        if (atoms.w != -1)
129
            posDelta[atoms.w] = xpj3;
130
131
132
        index += get_global_size(0);
    }
}