utilities.cl 4.98 KB
Newer Older
1
2
3
4
/**
 * Fill a buffer with 0.
 */

5
__kernel void clearBuffer(__global int* restrict buffer, int size) {
6
    int index = get_global_id(0);
7
    __global int4* buffer4 = (__global int4*) buffer;
8
9
    int sizeDiv4 = size/4;
    while (index < sizeDiv4) {
10
        buffer4[index] = (int4) 0;
11
        index += get_global_size(0);
12
13
14
    }
    if (get_global_id(0) == 0)
        for (int i = sizeDiv4*4; i < size; i++)
15
            buffer[i] = 0;
16
}
17

18
19
20
/**
 * Fill two buffers with 0.
 */
21
__kernel void clearTwoBuffers(__global int* restrict buffer1, int size1, __global int* restrict buffer2, int size2) {
22
23
24
25
26
27
28
    clearBuffer(buffer1, size1);
    clearBuffer(buffer2, size2);
}

/**
 * Fill three buffers with 0.
 */
29
__kernel void clearThreeBuffers(__global int* restrict buffer1, int size1, __global int* restrict buffer2, int size2, __global int* restrict buffer3, int size3) {
30
31
32
33
34
35
36
37
    clearBuffer(buffer1, size1);
    clearBuffer(buffer2, size2);
    clearBuffer(buffer3, size3);
}

/**
 * Fill four buffers with 0.
 */
38
__kernel void clearFourBuffers(__global int* restrict buffer1, int size1, __global int* restrict buffer2, int size2, __global int* restrict buffer3, int size3, __global int* restrict buffer4, int size4) {
39
40
41
42
43
44
    clearBuffer(buffer1, size1);
    clearBuffer(buffer2, size2);
    clearBuffer(buffer3, size3);
    clearBuffer(buffer4, size4);
}

45
46
47
/**
 * Fill five buffers with 0.
 */
48
__kernel void clearFiveBuffers(__global int* restrict buffer1, int size1, __global int* restrict buffer2, int size2, __global int* restrict buffer3, int size3, __global int* restrict buffer4, int size4, __global int* restrict buffer5, int size5) {
49
50
51
52
53
54
55
56
57
58
    clearBuffer(buffer1, size1);
    clearBuffer(buffer2, size2);
    clearBuffer(buffer3, size3);
    clearBuffer(buffer4, size4);
    clearBuffer(buffer5, size5);
}

/**
 * Fill six buffers with 0.
 */
59
__kernel void clearSixBuffers(__global int* restrict buffer1, int size1, __global int* restrict buffer2, int size2, __global int* restrict buffer3, int size3, __global int* restrict buffer4, int size4, __global int* restrict buffer5, int size5, __global int* restrict buffer6, int size6) {
60
61
62
63
64
65
66
67
    clearBuffer(buffer1, size1);
    clearBuffer(buffer2, size2);
    clearBuffer(buffer3, size3);
    clearBuffer(buffer4, size4);
    clearBuffer(buffer5, size5);
    clearBuffer(buffer6, size6);
}

68
69
70
71
/**
 * Sum a collection of buffers into the first one.
 */

72
__kernel void reduceReal4Buffer(__global real4* restrict buffer, int bufferSize, int numBuffers) {
73
74
75
    int index = get_global_id(0);
    int totalSize = bufferSize*numBuffers;
    while (index < bufferSize) {
76
        real4 sum = buffer[index];
77
78
79
        for (int i = index+bufferSize; i < totalSize; i += bufferSize)
            sum += buffer[i];
        buffer[index] = sum;
80
        index += get_global_size(0);
81
82
    }
}
83

84
85
86
/**
 * Sum the various buffers containing forces.
 */
87
__kernel void reduceForces(__global long* restrict longBuffer, __global real4* restrict buffer, int bufferSize, int numBuffers) {
88
    int totalSize = bufferSize*numBuffers;
89
    real scale = 1/(real) 0x100000000;
90
    for (int index = get_global_id(0); index < bufferSize; index += get_global_size(0)) {
91
#ifdef SUPPORTS_64_BIT_ATOMICS
92
        real4 sum = (real4) (scale*longBuffer[index], scale*longBuffer[index+bufferSize], scale*longBuffer[index+2*bufferSize], 0);
93
94
95
#else
        real4 sum = (real4) 0;
#endif
96
97
98
        for (int i = index; i < totalSize; i += bufferSize)
            sum += buffer[i];
        buffer[index] = sum;
99
100
101
        longBuffer[index] = (long) (sum.x*0x100000000);
        longBuffer[index+bufferSize] = (long) (sum.y*0x100000000);
        longBuffer[index+2*bufferSize] = (long) (sum.z*0x100000000);
102
103
104
    }
}

Peter Eastman's avatar
Peter Eastman committed
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
/**
 * Sum the energy buffer.
 */
__kernel void reduceEnergy(__global const mixed* restrict energyBuffer, __global mixed* restrict result, int bufferSize, int workGroupSize, __local mixed* tempBuffer) {
    const unsigned int thread = get_local_id(0);
    mixed sum = 0;
    for (unsigned int index = thread; index < bufferSize; index += get_local_size(0))
        sum += energyBuffer[index];
    tempBuffer[thread] = sum;
    for (int i = 1; i < workGroupSize; i *= 2) {
        barrier(CLK_LOCAL_MEM_FENCE);
        if (thread%(i*2) == 0 && thread+i < workGroupSize)
            tempBuffer[thread] += tempBuffer[thread+i];
    }
    if (thread == 0)
        *result = tempBuffer[0];
}

123
/**
124
 * This is called to determine the accuracy of various native functions.
125
126
 */

127
__kernel void determineNativeAccuracy(__global float8* restrict values, int numValues) {
Peter Eastman's avatar
Peter Eastman committed
128
    for (int i = get_global_id(0); i < numValues; i += get_global_size(0)) {
129
130
        float v = values[i].s0;
        values[i] = (float8) (v, native_sqrt(v), native_rsqrt(v), native_recip(v), native_exp(v), native_log(v), 0.0f, 0.0f);
131
    }
132
}
133
134
135
136
137
138
139
140

/**
 * Record the atomic charges into the posq array.
 */
__kernel void setCharges(__global real* restrict charges, __global real4* restrict posq, __global int* restrict atomOrder, int numAtoms) {
    for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0))
        posq[i].w = charges[atomOrder[i]];
}