shakeHydrogens.cl 5.53 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
mixed4 loadPos(__global const real4* restrict posq, __global const real4* restrict posqCorrection, int index) {
#ifdef USE_MIXED_PRECISION
    real4 pos1 = posq[index];
    real4 pos2 = posqCorrection[index];
    return (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
    return posq[index];
#endif
}

11
12
13
14
/**
 * Enforce constraints on SHAKE clusters
 */

15
__kernel void applyShakeToHydrogens(int numClusters, mixed tol, __global const real4* restrict oldPos, __global const real4* restrict posCorrection, __global mixed4* restrict posDelta, __global const int4* restrict clusterAtoms, __global const float4* restrict clusterParams) {
16
17
18
19
20
21
    int index = get_global_id(0);
    while (index < numClusters) {
        // Load the data for this cluster.

        int4 atoms = clusterAtoms[index];
        float4 params = clusterParams[index];
22
23
24
25
26
27
        mixed4 pos = loadPos(oldPos, posCorrection, atoms.x);
        mixed4 xpi = posDelta[atoms.x];
        mixed4 pos1 = loadPos(oldPos, posCorrection, atoms.y);
        mixed4 xpj1 = posDelta[atoms.y];
        mixed4 pos2 = {0.0f, 0.0f, 0.0f, 0.0f};
        mixed4 xpj2 = {0.0f, 0.0f, 0.0f, 0.0f};
28
29
30
31
32
        float invMassCentral = params.x;
        float avgMass = params.y;
        float d2 = params.z;
        float invMassPeripheral = params.w;
        if (atoms.z != -1) {
33
            pos2 = loadPos(oldPos, posCorrection, atoms.z);
34
35
            xpj2 = posDelta[atoms.z];
        }
36
37
        mixed4 pos3 = {0.0f, 0.0f, 0.0f, 0.0f};
        mixed4 xpj3 = {0.0f, 0.0f, 0.0f, 0.0f};
38
        if (atoms.w != -1) {
39
            pos3 = loadPos(oldPos, posCorrection, atoms.w);
40
41
42
43
44
            xpj3 = posDelta[atoms.w];
        }

        // Precompute quantities.

45
46
47
48
49
50
51
52
53
        mixed4 rij1 = pos-pos1;
        mixed4 rij2 = pos-pos2;
        mixed4 rij3 = pos-pos3;
        mixed rij1sq = rij1.x*rij1.x + rij1.y*rij1.y + rij1.z*rij1.z;
        mixed rij2sq = rij2.x*rij2.x + rij2.y*rij2.y + rij2.z*rij2.z;
        mixed rij3sq = rij3.x*rij3.x + rij3.y*rij3.y + rij3.z*rij3.z;
        mixed ld1 = d2-rij1sq;
        mixed ld2 = d2-rij2sq;
        mixed ld3 = d2-rij3sq;
54
55
56

        // Iterate until convergence.

Peter Eastman's avatar
Peter Eastman committed
57
        int converged = false;
58
59
60
        int iteration = 0;
        while (iteration < 15 && !converged) {
            converged = true;
61
#ifdef CONSTRAIN_VELOCITIES
62
63
64
65
            mixed4 rpij = xpi-xpj1;
            mixed rrpr = rpij.x*rij1.x + rpij.y*rij1.y + rpij.z*rij1.z;
            mixed delta = -2.0f*avgMass*rrpr/rij1sq;
            mixed4 dr = rij1*delta;
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
            xpi.xyz += dr.xyz*invMassCentral;
            xpj1.xyz -= dr.xyz*invMassPeripheral;
            if (fabs(delta) > tol)
                converged = false;
            if (atoms.z != -1) {
                rpij = xpi-xpj2;
                rrpr = rpij.x*rij2.x + rpij.y*rij2.y + rpij.z*rij2.z;
                delta = -2.0f*avgMass*rrpr/rij2sq;
                dr = rij2*delta;
                xpi.xyz += dr.xyz*invMassCentral;
                xpj2.xyz -= dr.xyz*invMassPeripheral;
                if (fabs(delta) > tol)
                    converged = false;
            }
            if (atoms.w != -1) {
                rpij = xpi-xpj3;
                rrpr = rpij.x*rij3.x + rpij.y*rij3.y + rpij.z*rij3.z;
                delta = -2.0f*avgMass*rrpr/rij3sq;
                dr = rij3*delta;
                xpi.xyz += dr.xyz*invMassCentral;
                xpj3.xyz -= dr.xyz*invMassPeripheral;
                if (fabs(delta) > tol)
                    converged = false;
            }
#else
91
92
93
94
            mixed4 rpij = xpi-xpj1;
            mixed rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z;
            mixed rrpr = rij1.x*rpij.x + rij1.y*rpij.y + rij1.z*rpij.z;
            mixed diff = fabs(ld1-2.0f*rrpr-rpsqij) / (d2*tol);
95
            if (diff >= 1.0f) {
96
97
                mixed acor  = (ld1-2.0f*rrpr-rpsqij)*avgMass / (rrpr+rij1sq);
                mixed4 dr = rij1*acor;
98
99
100
101
102
103
104
105
106
107
                xpi.xyz += dr.xyz*invMassCentral;
                xpj1.xyz -= dr.xyz*invMassPeripheral;
                converged = false;
            }
            if (atoms.z != -1) {
                rpij.xyz = xpi.xyz-xpj2.xyz;
                rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z;
                rrpr = rij2.x*rpij.x + rij2.y*rpij.y + rij2.z*rpij.z;
                diff = fabs(ld2-2.0f*rrpr-rpsqij) / (d2*tol);
                if (diff >= 1.0f) {
108
109
                    mixed acor  = (ld2 - 2.0f*rrpr - rpsqij)*avgMass / (rrpr + rij2sq);
                    mixed4 dr = rij2*acor;
110
111
112
113
114
115
116
117
118
119
120
                    xpi.xyz += dr.xyz*invMassCentral;
                    xpj2.xyz -= dr.xyz*invMassPeripheral;
                    converged = false;
                }
            }
            if (atoms.w != -1) {
                rpij.xyz = xpi.xyz-xpj3.xyz;
                rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z;
                rrpr = rij3.x*rpij.x + rij3.y*rpij.y + rij3.z*rpij.z;
                diff = fabs(ld3 - 2.0f*rrpr - rpsqij) / (d2*tol);
                if (diff >= 1.0f) {
121
122
                    mixed acor  = (ld3-2.0f*rrpr-rpsqij)*avgMass / (rrpr+rij3sq);
                    mixed4 dr = rij3*acor;
123
124
125
126
127
                    xpi.xyz += dr.xyz*invMassCentral;
                    xpj3.xyz -= dr.xyz*invMassPeripheral;
                    converged = false;
                }
            }
128
#endif
129
130
131
132
133
            iteration++;
        }

        // Record the new positions.

134
135
        posDelta[atoms.x] = xpi;
        posDelta[atoms.y] = xpj1;
136
        if (atoms.z != -1)
137
            posDelta[atoms.z] = xpj2;
138
        if (atoms.w != -1)
139
            posDelta[atoms.w] = xpj3;
140
141
142
        index += get_global_size(0);
    }
}