pme_cpu.cl 8.74 KB
Newer Older
1
2
__kernel void updateBsplines(__global const real4* restrict posq, __global real4* restrict pmeBsplineTheta, __local real4* restrict bsplinesCache, __global int2* restrict pmeAtomGridIndex, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global real4* restrict pmeBsplineDTheta) {
    const real4 scale = 1.0f/(PME_ORDER-1);
3
    for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
4
5
        __local real4* data = &bsplinesCache[get_local_id(0)*PME_ORDER];
        __local real4* ddata = &bsplinesCache[get_local_id(0)*PME_ORDER + get_local_size(0)*PME_ORDER];
6
        for (int j = 0; j < PME_ORDER; j++) {
7
8
	    data[j] = 0;
            ddata[j] = 0;
9
        }
10
        real4 pos = posq[i];
11
12
13
        pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x;
        pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y;
        pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z;
14
        real4 t = (real4) ((pos.x*invPeriodicBoxSize.x)*GRID_SIZE_X,
15
                             (pos.y*invPeriodicBoxSize.y)*GRID_SIZE_Y,
16
17
18
                             (pos.z*invPeriodicBoxSize.z)*GRID_SIZE_Z, 0);
        real4 dr = (real4) (t.x-(int) t.x, t.y-(int) t.y, t.z-(int) t.z, 0);
        data[PME_ORDER-1] = 0;
19
20
21
        data[1] = dr;
        data[0] = 1.0f-dr;
        for (int j = 3; j < PME_ORDER; j++) {
22
            real div = 1.0f/(j-1.0f);
23
24
            data[j-1] = div*dr*data[j-2];
            for (int k = 1; k < (j-1); k++)
25
                data[j-k-1] = div*((dr+(real4) k) *data[j-k-2] + (-dr+(real4) (j-k))*data[j-k-1]);
26
27
28
29
30
31
32
            data[0] = div*(- dr+1.0f)*data[0];
        }
        ddata[0] = -data[0];
        for (int j = 1; j < PME_ORDER; j++)
            ddata[j] = data[j-1]-data[j];
        data[PME_ORDER-1] = scale*dr*data[PME_ORDER-2];
        for (int j = 1; j < (PME_ORDER-1); j++)
33
            data[PME_ORDER-j-1] = scale*((dr+(real4) j)*data[PME_ORDER-j-2] + (-dr+(real4) (PME_ORDER-j))*data[PME_ORDER-j-1]);
34
35
36
37
38
39
40
41
42
43
44
        data[0] = scale*(-dr+1.0f)*data[0];
        for (int j = 0; j < PME_ORDER; j++) {
            pmeBsplineTheta[i+j*NUM_ATOMS] = data[j];
            pmeBsplineDTheta[i+j*NUM_ATOMS] = ddata[j];
        }
    }
}

/**
 * This kernel is not actually used when running on a CPU.
 */
45
__kernel void findAtomRangeForGrid(__global const int2* restrict pmeAtomGridIndex, __global int* restrict pmeAtomRange, __global const real4* restrict posq, real4 periodicBoxSize, real4 invPeriodicBoxSize) {
46
47
}

48
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange, __global real2* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 invPeriodicBoxSize) {
49
50
51
    const int firstx = get_global_id(0)*GRID_SIZE_X/get_global_size(0);
    const int lastx = (get_global_id(0)+1)*GRID_SIZE_X/get_global_size(0);
    for (int gridIndex = firstx*GRID_SIZE_Y*GRID_SIZE_Z; gridIndex < lastx*GRID_SIZE_Y*GRID_SIZE_Z; gridIndex++)
52
        pmeGrid[gridIndex] = (real2) 0;
53
    for (int atom = 0; atom < NUM_ATOMS; atom++) {
54
        real4 pos = posq[atom];
55
56
57
        pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x;
        pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y;
        pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z;
58
        real4 t = (real4) ((pos.x*invPeriodicBoxSize.x)*GRID_SIZE_X,
59
                             (pos.y*invPeriodicBoxSize.y)*GRID_SIZE_Y,
60
61
                             (pos.z*invPeriodicBoxSize.z)*GRID_SIZE_Z, 0);
        real4 dr = (real4) (t.x-(int) t.x, t.y-(int) t.y, t.z-(int) t.z, 0);
62
63
64
        int4 gridIndex = (int4) (((int) t.x) % GRID_SIZE_X,
                                 ((int) t.y) % GRID_SIZE_Y,
                                 ((int) t.z) % GRID_SIZE_Z, 0);
65
        real atomCharge = pos.w*EPSILON_FACTOR;
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
        for (int ix = 0; ix < PME_ORDER; ix++) {
            int xindex = gridIndex.x+ix;
            xindex -= (xindex >= GRID_SIZE_X ? GRID_SIZE_X : 0);
            if (xindex < firstx || xindex >= lastx)
                continue;
            for (int iy = 0; iy < PME_ORDER; iy++) {
                int yindex = gridIndex.y+iy;
                yindex -= (yindex >= GRID_SIZE_Y ? GRID_SIZE_Y : 0);
                for(int iz = 0; iz < PME_ORDER; iz++) {
                    int zindex = gridIndex.z+iz;
                    zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
                    int index = xindex*GRID_SIZE_Y*GRID_SIZE_Z + yindex*GRID_SIZE_Z + zindex;
                    pmeGrid[index].x += atomCharge*pmeBsplineTheta[atom+ix*NUM_ATOMS].x*pmeBsplineTheta[atom+iy*NUM_ATOMS].y*pmeBsplineTheta[atom+iz*NUM_ATOMS].z;
                }
            }
        }
    }
}

85
86
__kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global real* restrict energyBuffer, __global const real* restrict pmeBsplineModuliX,
        __global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ, real4 invPeriodicBoxSize, real recipScaleFactor) {
87
    const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
88
    real energy = 0;
89
90
91
92
93
94
95
96
97
98
    for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
        int kx = index/(GRID_SIZE_Y*GRID_SIZE_Z);
        int remainder = index-kx*GRID_SIZE_Y*GRID_SIZE_Z;
        int ky = remainder/GRID_SIZE_Z;
        int kz = remainder-ky*GRID_SIZE_Z;
        if (kx == 0 && ky == 0 && kz == 0)
            continue;
        int mx = (kx < (GRID_SIZE_X+1)/2) ? kx : (kx-GRID_SIZE_X);
        int my = (ky < (GRID_SIZE_Y+1)/2) ? ky : (ky-GRID_SIZE_Y);
        int mz = (kz < (GRID_SIZE_Z+1)/2) ? kz : (kz-GRID_SIZE_Z);
99
100
101
102
103
104
105
106
107
108
109
        real mhx = mx*invPeriodicBoxSize.x;
        real mhy = my*invPeriodicBoxSize.y;
        real mhz = mz*invPeriodicBoxSize.z;
        real bx = pmeBsplineModuliX[kx];
        real by = pmeBsplineModuliY[ky];
        real bz = pmeBsplineModuliZ[kz];
        real2 grid = pmeGrid[index];
        real m2 = mhx*mhx+mhy*mhy+mhz*mhz;
        real denom = m2*bx*by*bz;
        real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
        pmeGrid[index] = (real2) (grid.x*eterm, grid.y*eterm);
110
111
112
113
114
        energy += eterm*(grid.x*grid.x + grid.y*grid.y);
    }
    energyBuffer[get_global_id(0)] += 0.5f*energy;
}

115
__kernel void gridInterpolateForce(__global const real4* restrict posq, __global real4* restrict forceBuffers, __global const real2* restrict pmeGrid, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const real4* restrict pmeBsplineTheta, __global const real4* restrict pmeBsplineDTheta) {
116
    for (int atom = get_global_id(0); atom < NUM_ATOMS; atom += get_global_size(0)) {
117
118
        real4 force = 0;
        real4 pos = posq[atom];
119
120
121
        pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x;
        pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y;
        pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z;
122
123
124
        real4 t = (real4) ((pos.x*invPeriodicBoxSize.x)*GRID_SIZE_X,
                           (pos.y*invPeriodicBoxSize.y)*GRID_SIZE_Y,
                           (pos.z*invPeriodicBoxSize.z)*GRID_SIZE_Z, 0);
125
126
127
128
129
130
        int4 gridIndex = (int4) (((int) t.x) % GRID_SIZE_X,
                                 ((int) t.y) % GRID_SIZE_Y,
                                 ((int) t.z) % GRID_SIZE_Z, 0);
        for (int ix = 0; ix < PME_ORDER; ix++) {
            int xindex = gridIndex.x+ix;
            xindex -= (xindex >= GRID_SIZE_X ? GRID_SIZE_X : 0);
131
132
            real tx = pmeBsplineTheta[atom+ix*NUM_ATOMS].x;
            real dtx = pmeBsplineDTheta[atom+ix*NUM_ATOMS].x;
133
134
135
            for (int iy = 0; iy < PME_ORDER; iy++) {
                int yindex = gridIndex.y+iy;
                yindex -= (yindex >= GRID_SIZE_Y ? GRID_SIZE_Y : 0);
136
137
                real ty = pmeBsplineTheta[atom+iy*NUM_ATOMS].y;
                real dty = pmeBsplineDTheta[atom+iy*NUM_ATOMS].y;
138
139
140
                for (int iz = 0; iz < PME_ORDER; iz++) {
                    int zindex = gridIndex.z+iz;
                    zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
141
142
                    real tz = pmeBsplineTheta[atom+iz*NUM_ATOMS].z;
                    real dtz = pmeBsplineDTheta[atom+iz*NUM_ATOMS].z;
143
                    int index = xindex*GRID_SIZE_Y*GRID_SIZE_Z + yindex*GRID_SIZE_Z + zindex;
144
                    real gridvalue = pmeGrid[index].x;
145
146
147
148
149
150
                    force.x += dtx*ty*tz*gridvalue;
                    force.y += tx*dty*tz*gridvalue;
                    force.z += tx*ty*dtz*gridvalue;
                }
            }
        }
151
152
        real4 totalForce = forceBuffers[atom];
        real q = pos.w*EPSILON_FACTOR;
153
154
155
156
157
158
        totalForce.x -= q*force.x*GRID_SIZE_X*invPeriodicBoxSize.x;
        totalForce.y -= q*force.y*GRID_SIZE_Y*invPeriodicBoxSize.y;
        totalForce.z -= q*force.z*GRID_SIZE_Z*invPeriodicBoxSize.z;
        forceBuffers[atom] = totalForce;
    }
}