rmsd.cl 3.44 KB
Newer Older
peastman's avatar
peastman committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
// This file contains kernels to compute the RMSD and its gradient using the algorithm described
// in Coutsias et al, "Using quaternions to calculate RMSD" (doi: 10.1002/jcc.20110).

/**
 * Sum a value over all threads.
 */
real reduceValue(real value, __local real* temp) {
    const int thread = get_local_id(0);
    temp[thread] = value;
    barrier(CLK_LOCAL_MEM_FENCE);
    for (uint step = 1; step < get_local_size(0); step *= 2) {
        if (thread+step < get_local_size(0) && thread%(2*step) == 0)
            temp[thread] = temp[thread] + temp[thread+step];
        barrier(CLK_LOCAL_MEM_FENCE);
    }
    return temp[0];
}

/**
 * Perform the first step of computing the RMSD.  This is executed as a single work group.
 */
__kernel void computeRMSDPart1(int numParticles, __global const real4* restrict posq, __global const real4* restrict referencePos,
        __global const int* restrict particles, __global real* buffer, __local real* restrict temp) {
    // Compute the center of the particle positions.
    
    real3 center = (real3) 0;
    for (int i = get_local_id(0); i < numParticles; i += get_local_size(0))
        center += posq[particles[i]].xyz;
    center.x = reduceValue(center.x, temp)/numParticles;
    center.y = reduceValue(center.y, temp)/numParticles;
    center.z = reduceValue(center.z, temp)/numParticles;
    
    // Compute the correlation matrix.
    
    real R[3][3] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}};
    real sum = 0;
    for (int i = get_local_id(0); i < numParticles; i += get_local_size(0)) {
        int index = particles[i];
        real3 pos = posq[index].xyz - center;
        real3 refPos = referencePos[index].xyz;
        R[0][0] += pos.x*refPos.x;
        R[0][1] += pos.x*refPos.y;
        R[0][2] += pos.x*refPos.z;
        R[1][0] += pos.y*refPos.x;
        R[1][1] += pos.y*refPos.y;
        R[1][2] += pos.y*refPos.z;
        R[2][0] += pos.z*refPos.x;
        R[2][1] += pos.z*refPos.y;
        R[2][2] += pos.z*refPos.z;
        sum += dot(pos, pos);
    }
    for (int i = 0; i < 3; i++)
        for (int j = 0; j < 3; j++)
            R[i][j] = reduceValue(R[i][j], temp);
    sum = reduceValue(sum, temp);

    // Copy everything into the output buffer to send back to the host.
    
    if (get_local_id(0) == 0) {
        for (int i = 0; i < 3; i++)
            for (int j = 0; j < 3; j++)
                buffer[3*i+j] = R[i][j];
        buffer[9] = sum;
        buffer[10] = center.x;
        buffer[11] = center.y;
        buffer[12] = center.z;
    }
}

/**
 * Apply forces based on the RMSD.
 */
__kernel void computeRMSDForces(int numParticles, __global const real4* restrict posq, __global const real4* restrict referencePos,
        __global const int* restrict particles, __global const real* buffer, __global real4* restrict forceBuffers) {
    real3 center = (real3) (buffer[10], buffer[11], buffer[12]);
    real scale = 1 / (real) (buffer[9]*numParticles);
    for (int i = get_global_id(0); i < numParticles; i += get_global_size(0)) {
        int index = particles[i];
        real3 pos = posq[index].xyz - center;
        real3 refPos = referencePos[index].xyz;
        real3 rotatedRef = (real3) (buffer[0]*refPos[0] + buffer[3]*refPos[1] + buffer[6]*refPos[2],
                                    buffer[1]*refPos[0] + buffer[4]*refPos[1] + buffer[7]*refPos[2],
                                    buffer[2]*refPos[0] + buffer[5]*refPos[1] + buffer[8]*refPos[2]);
        forceBuffers[index].xyz -= (pos-rotatedRef)*scale;
    }
}