"...amoeba/platforms/cuda2/include/AmoebaCudaKernelFactory.h" did not exist on "437ca02f29067f5eb3b295998feb670b3f7f3f51"
customIntegrator.cl 1.01 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
__kernel void computeSum(__global const float* restrict sumBuffer, __global float* result, unsigned int outputIndex) {
    __local float tempBuffer[WORK_GROUP_SIZE];
    const unsigned int thread = get_local_id(0);
    float sum = 0.0f;
    for (unsigned int index = thread; index < 3*NUM_ATOMS; index += get_local_size(0))
        sum += sumBuffer[index];
    tempBuffer[thread] = sum;
    for (int i = 1; i < WORK_GROUP_SIZE; i *= 2) {
        barrier(CLK_LOCAL_MEM_FENCE);
        if (thread%(i*2) == 0 && thread+i < WORK_GROUP_SIZE)
            tempBuffer[thread] += tempBuffer[thread+i];
    }
    if (thread == 0)
        result[outputIndex] = tempBuffer[0];
}
16
17
18
19
20
21

__kernel void applyPositionDeltas(__global float4* restrict posq, __global float4* restrict posDelta) {
    for (unsigned int index = get_local_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
        float4 position = posq[index];
        position.xyz += posDelta[index].xyz;
        posq[index] = position;
22
        posDelta[index] = (float4) 0.0f;
23
24
    }
}