ccma.cl 5.66 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/**
 * Compute the direction each constraint is pointing in.  This is called once at the beginning of constraint evaluation.
 */
__kernel void computeConstraintDirections(__global int2* constraintAtoms, __global float4* constraintDistance, __global float4* atomPositions,
        __global int* converged) {
    for (int index = get_global_id(0); index < NUM_CONSTRAINTS; index += get_global_size(0)) {
        // Compute the direction for this constraint.

        int2 atoms = constraintAtoms[index];
        float4 dir = constraintDistance[index];
        float4 oldPos1 = atomPositions[atoms.x];
        float4 oldPos2 = atomPositions[atoms.y];
        dir.x = oldPos1.x-oldPos2.x;
        dir.y = oldPos1.y-oldPos2.y;
        dir.z = oldPos1.z-oldPos2.z;
        constraintDistance[index] = dir;
    }

    // Mark that no work groups have converged yet.

    for (int index = get_global_id(0); index < get_num_groups(0); index += get_global_size(0))
        converged[index] = false;
}

/**
 * Compute the force applied by each constraint.
 */
__kernel void computeConstraintForce(__global int2* constraintAtoms, __global float4* constraintDistance, __global float4* atomPositions,
        __global float* reducedMass, __global float* delta1, __global int* converged, __local int* convergedBuffer, float tol) {
    if (converged[get_group_id(0)])
        return; // The constraint iteration has already converged.
    float lowerTol = 1.0f-2.0f*tol+tol*tol;
    float upperTol = 1.0f+2.0f*tol+tol*tol;
    int threadConverged = 1;
    for (int index = get_global_id(0); index < NUM_CONSTRAINTS; index += get_global_size(0)) {
        // Compute the force due to this constraint.

        int2 atoms = constraintAtoms[index];
        float4 dir = constraintDistance[index];
        float4 rp_ij = atomPositions[atoms.x]-atomPositions[atoms.y];
        rp_ij.xyz += dir.xyz;
        float rp2 = rp_ij.x*rp_ij.x + rp_ij.y*rp_ij.y + rp_ij.z*rp_ij.z;
        float dist2 = dir.w*dir.w;
        float diff = dist2 - rp2;
        float rrpr = rp_ij.x*dir.x + rp_ij.y*dir.y + rp_ij.z*dir.z;
        float d_ij2 = dir.x*dir.x + dir.y*dir.y + dir.z*dir.z;
        delta1[index] = (rrpr > d_ij2*1e-6f ? reducedMass[index]*diff/rrpr : 0.0f);

        // See whether it has converged.

        threadConverged &= (rp2 >= lowerTol*dist2 && rp2 <= upperTol*dist2);
    }

    // Perform a parallel reduction to see if all constraints handled by this work group have converged.

    convergedBuffer[get_local_id(0)] = threadConverged;
    barrier(CLK_LOCAL_MEM_FENCE);
    for (int step = 1; step < get_local_size(0); step *= 2) {
        if (get_local_id(0)%(2*step) == 0)
            convergedBuffer[get_local_id(0)] &= convergedBuffer[get_local_id(0)+step];
        barrier(CLK_LOCAL_MEM_FENCE);
    }
    if (get_local_id(0) == 0)
        converged[get_group_id(0)] = convergedBuffer[0];
}

/**
 * Multiply the vector of constraint forces by the constraint matrix.
 */
__kernel void multiplyByConstraintMatrix(__global float* delta1, __global float* delta2, __global int* constraintMatrixColumn,
        __global float* constraintMatrixValue, __global int* converged, __local int* convergedBuffer) {
    // First see whether all work groups have converged.

    convergedBuffer[get_local_id(0)] = true;
    for (int index = get_local_id(0); index < get_num_groups(0); index += get_local_size(0))
        convergedBuffer[get_local_id(0)] &= converged[index];
    barrier(CLK_LOCAL_MEM_FENCE);
    for (int step = 1; step < get_local_size(0); step *= 2) {
        if (get_local_id(0)%(2*step) == 0)
            convergedBuffer[get_local_id(0)] &= convergedBuffer[get_local_id(0)+step];
        barrier(CLK_LOCAL_MEM_FENCE);
    }
    if (get_local_id(0) == 0)
        converged[get_group_id(0)] = convergedBuffer[0];
    if (converged[0])
        return; // The constraint iteration has already converged.

    // Multiply by the inverse constraint matrix.

    for (int index = get_global_id(0); index < NUM_CONSTRAINTS; index += get_global_size(0)) {
        float sum = 0.0f;
        for (int i = 0; ; i++) {
            int element = index+i*NUM_CONSTRAINTS;
            int column = constraintMatrixColumn[element];
            if (column >= NUM_CONSTRAINTS)
                break;
            sum += delta1[column]*constraintMatrixValue[element];
        }
        delta2[index] = sum;
    }
}

/**
 * Update the atom positions based on constraint forces.
 */
__kernel void updateAtomPositions(__global int* numAtomConstraints, __global int* atomConstraints, __global float4* constraintDistance,
        __global float4* atomPositions, __global float4* velm, __global float* delta1, __global float* delta2, __global int* converged, int iteration) {
    if (converged[get_group_id(0)])
        return; // The constraint iteration has already converged.
    float damping = (iteration < 2 ? 0.5f : 1.0f);
    for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
        // Compute the new position of this atom.

        float4 atomPos = atomPositions[index];
        float invMass = velm[index].w;
        int num = numAtomConstraints[index];
        for (int i = 0; i < num; i++) {
            int constraint = atomConstraints[index+i*NUM_ATOMS];
            bool forward = (constraint > 0);
            constraint = (forward ? constraint-1 : -constraint-1);
            float constraintForce = damping*invMass*delta2[constraint];
            constraintForce = (forward ? constraintForce : -constraintForce);
            float4 dir = constraintDistance[constraint];
            atomPos.x += constraintForce*dir.x;
            atomPos.y += constraintForce*dir.y;
            atomPos.z += constraintForce*dir.z;
        }
        atomPositions[index] = atomPos;
    }
}