kCalculateCDLJEwaldFastReciprocal.h 7.06 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
 * Portions copyright (c) 2009 Stanford University and the Authors.           *
 * Authors: Rossen P. Apostolov, Peter Eastman                                     *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

  /* Define multiply operations for floats */
  
  __device__ float2 MultofFloat2(float2 a, float2 b)
  {
       float2 c;
       c.x = a.x * b.x - a.y * b.y;
       c.y = a.x * b.y + a.y * b.x;
       return c;
  
  }
  
  __device__ float2 ConjMultofFloat2(float2 a, float2 b)
  {
       float2 c;
       c.x = a.x*b.x + a.y*b.y;
       c.y = a.y*b.x - a.x*b.y;
       return c;
  
  }
  
__global__ void kCalculateEwaldFastEikr_kernel()
{

50
    int kmax = cSim.kmax;
51
52
53
54
55
56
57
    float4 apos;

    unsigned int atom    = threadIdx.x + blockIdx.x * blockDim.x;

    while (atom < cSim.atoms)
    {

58
59
          apos                         = cSim.pPosq[atom];

60
61
62
63
64
//generic form of the array
// pEikr[ atomID*kmax*3 + k*3 + m]


// k = 0, explicitly
65
        for(unsigned int m = 0; (m < 3); m++) {
66
67
          cSim.pEwaldEikr[atom*kmax*3 + 0 + m].x = 1;
          cSim.pEwaldEikr[atom*kmax*3 + 0 + m].y = 0;
68
        }
69
// k = 1, explicitly
70
71
          cSim.pEwaldEikr[atom*kmax*3 + 3 + 0].x = cos(apos.x*cSim.recipBoxSizeX);
          cSim.pEwaldEikr[atom*kmax*3 + 3 + 0].y = sin(apos.x*cSim.recipBoxSizeX);
72

73
74
          cSim.pEwaldEikr[atom*kmax*3 + 3 + 1].x = cos(apos.y*cSim.recipBoxSizeY);
          cSim.pEwaldEikr[atom*kmax*3 + 3 + 1].y = sin(apos.y*cSim.recipBoxSizeY);
75

76
77
          cSim.pEwaldEikr[atom*kmax*3 + 3 + 2].x = cos(apos.z*cSim.recipBoxSizeZ);
          cSim.pEwaldEikr[atom*kmax*3 + 3 + 2].y = sin(apos.z*cSim.recipBoxSizeZ);
78
79
80
81

// k > 1, by recursion
        for(unsigned int k=2; (k<kmax); k++) {
          for(unsigned int m=0; (m<3); m++) {
82
            cSim.pEwaldEikr[atom*kmax*3 + k*3 + m] = MultofFloat2 (cSim.pEwaldEikr[atom*kmax*3 + (k-1)*3 + m] , cSim.pEwaldEikr[atom*kmax*3 + 3 + m]);
83
84
85
86
87
88
89
          }
        }

        atom                            += blockDim.x * gridDim.x;
    }
}

90
91
__global__ void kCalculateEwaldFastCosSinSums_kernel()
{
92
93
94
95
    const unsigned int ksize = 2*cSim.kmax-1;
    const unsigned int totalK = ksize*ksize*ksize;
    unsigned int index = threadIdx.x + blockIdx.x * blockDim.x;
    while (index < totalK)
96
    {
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
        int rx = index/(ksize*ksize);
        int remainder = index - rx*ksize*ksize;
        int ry = remainder/ksize;
        int rz = remainder - ry*ksize - cSim.kmax + 1;
        ry += -cSim.kmax + 1;
        float2 sum = make_float2(0.0f, 0.0f);
        for (int atom = 0; atom < cSim.atoms; atom++)
        {
            float2 tab_xy = (ry >= 0 ? MultofFloat2 (cSim.pEwaldEikr[atom*cSim.kmax*3 + rx*3 + 0], cSim.pEwaldEikr[atom*cSim.kmax*3 + ry*3 + 1]) :
                                       ConjMultofFloat2 (cSim.pEwaldEikr[atom*cSim.kmax*3 + rx*3 + 0], cSim.pEwaldEikr[atom*cSim.kmax*3 - ry*3 + 1]));
            float charge = cSim.pPosq[atom].w;
            float2 structureFactor = (rz >= 0 ? MultofFloat2(tab_xy, cSim.pEwaldEikr[atom*cSim.kmax*3 + rz*3 + 2]) :
                                                ConjMultofFloat2(tab_xy, cSim.pEwaldEikr[atom*cSim.kmax*3 - rz*3 + 2]));
            sum.x += charge*structureFactor.x;
            sum.y += charge*structureFactor.y;
112
        }
113
114
        cSim.pEwaldCosSinSum[index] = sum;
        index += blockDim.x * gridDim.x;
115
116
117
118
119
120
    }
}

__global__ void kCalculateEwaldFastForces_kernel()
{

121
122
123
    float PI = 3.14159265358979323846f;
    const float epsilon =  1.0;
    float recipCoeff = cSim.epsfac*(4*PI/cSim.cellVolume/epsilon);
124
125
126

    int lowry = 0;
    int lowrz = 1;
127
128
129
    const int numRx = cSim.kmax;
    const int numRy = cSim.kmax;
    const int numRz = cSim.kmax;
130

131
    unsigned int atom = threadIdx.x + blockIdx.x * blockDim.x;
132
133
134

    while (atom < cSim.atoms)
    {
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
        float charge = cSim.pPosq[atom].w;
        for (int rx = 0; rx < numRx; rx++) {
            float kx = rx * cSim.recipBoxSizeX;
            for (int ry = lowry; ry < numRy; ry++) {
                float ky = ry * cSim.recipBoxSizeY;
                float2 tab_xy = (ry >= 0 ? MultofFloat2(cSim.pEwaldEikr[atom*cSim.kmax*3 + rx*3 + 0], cSim.pEwaldEikr[atom*cSim.kmax*3 + ry*3 + 1]) :
                                           ConjMultofFloat2(cSim.pEwaldEikr[atom*cSim.kmax*3 + rx*3 + 0], cSim.pEwaldEikr[atom*cSim.kmax*3 - ry*3 + 1]));
                for (int rz = lowrz; rz < numRz; rz++) {
                    float kz = rz * cSim.recipBoxSizeZ;
                    int index = rx*(numRy*2-1)*(numRz*2-1) + (ry+numRy-1)*(numRz*2-1) + (rz+numRz-1);
                    float k2 = kx*kx + ky*ky + kz*kz;
                    float ak = exp(k2*cSim.factorEwald) / k2;
                    float2 structureFactor = (rz >= 0 ? MultofFloat2(tab_xy, cSim.pEwaldEikr[atom*cSim.kmax*3 + rz*3 + 2]) :
                                                        ConjMultofFloat2(tab_xy, cSim.pEwaldEikr[atom*cSim.kmax*3 - rz*3 + 2]));
                    float dEdR = ak * charge * (cSim.pEwaldCosSinSum[index].x*structureFactor.y - cSim.pEwaldCosSinSum[index].y*structureFactor.x);
                    cSim.pForce4[atom].x += 2 * recipCoeff * dEdR * kx;
                    cSim.pForce4[atom].y += 2 * recipCoeff * dEdR * ky;
                    cSim.pForce4[atom].z += 2 * recipCoeff * dEdR * kz;
                    lowrz = 1 - numRz;
                }
                lowry = 1 - numRy;
            }
157
        }
158
        atom += blockDim.x * gridDim.x;
159
160
    }
}