qtb.cc 10.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
/**
 * Perform the first part of integration: velocity step.
 */
KERNEL void integrateQTBPart1(int numAtoms, int paddedNumAtoms, mixed dt, int stepIndex, GLOBAL mixed4* RESTRICT velm,
        GLOBAL const mm_long* RESTRICT force, GLOBAL mixed* RESTRICT segmentVelocity, GLOBAL const int* RESTRICT atomOrder) {
    mixed fscale = dt/(mixed) 0x100000000;
    for (int atom = GLOBAL_ID; atom < numAtoms; atom += GLOBAL_SIZE) {
        int atomIndex = atomOrder[atom];
        mixed4 velocity = velm[atom];
        segmentVelocity[3*numAtoms*stepIndex + atomIndex] = velocity.x;
        segmentVelocity[3*numAtoms*stepIndex + numAtoms + atomIndex] = velocity.y;
        segmentVelocity[3*numAtoms*stepIndex + 2*numAtoms + atomIndex] = velocity.z;
        if (velocity.w != 0.0) {
            velocity.x += fscale*velocity.w*force[atom];
            velocity.y += fscale*velocity.w*force[atom+paddedNumAtoms];
            velocity.z += fscale*velocity.w*force[atom+paddedNumAtoms*2];
            velm[atom] = velocity;
        }
    }
}

/**
 * Perform the second part of integration: position half step, then interact with heat bath,
 * then another position half step.
 */
KERNEL void integrateQTBPart2(int numAtoms, mixed dt, mixed friction, int stepIndex, GLOBAL mixed4* RESTRICT velm,
        GLOBAL mixed4* RESTRICT posDelta, GLOBAL mixed4* RESTRICT oldDelta, GLOBAL const mixed* RESTRICT randomForce,
        GLOBAL const int* RESTRICT atomOrder) {
    mixed halfdt = 0.5f*dt;
    mixed vscale = EXP(-dt*friction);
    for (int atom = GLOBAL_ID; atom < numAtoms; atom += GLOBAL_SIZE) {
        int atomIndex = atomOrder[atom];
        mixed4 velocity = velm[atom];
        if (velocity.w != 0.0) {
            mixed4 delta = make_mixed4(halfdt*velocity.x, halfdt*velocity.y, halfdt*velocity.z, 0);
            mixed fscale = dt*velocity.w;
            velocity.x = vscale*velocity.x + fscale*randomForce[3*numAtoms*stepIndex + atomIndex];
            velocity.y = vscale*velocity.y + fscale*randomForce[3*numAtoms*stepIndex + numAtoms + atomIndex];
            velocity.z = vscale*velocity.z + fscale*randomForce[3*numAtoms*stepIndex + 2*numAtoms + atomIndex];
            velm[atom] = velocity;
            delta += make_mixed4(halfdt*velocity.x, halfdt*velocity.y, halfdt*velocity.z, 0);
            posDelta[atom] = delta;
            oldDelta[atom] = delta;
        }
    }
}

/**
 * Perform the third part of integration: apply constraint forces to velocities, then record
 * the constrained positions.
 */
KERNEL void integrateQTBPart3(int numAtoms, mixed dt, GLOBAL real4* RESTRICT posq, GLOBAL mixed4* RESTRICT velm,
         GLOBAL const mixed4* RESTRICT posDelta, GLOBAL const mixed4* RESTRICT oldDelta
#ifdef USE_MIXED_PRECISION
        , GLOBAL real4* RESTRICT posqCorrection
#endif
        ) {
    mixed invDt = 1/dt;
    for (int index = GLOBAL_ID; index < numAtoms; index += GLOBAL_SIZE) {
        mixed4 velocity = velm[index];
        if (velocity.w != 0.0) {
            mixed4 delta = posDelta[index];
            velocity.x += (delta.x-oldDelta[index].x)*invDt;
            velocity.y += (delta.y-oldDelta[index].y)*invDt;
            velocity.z += (delta.z-oldDelta[index].z)*invDt;
            velm[index] = velocity;
#ifdef USE_MIXED_PRECISION
            real4 pos1 = posq[index];
            real4 pos2 = posqCorrection[index];
            mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
            real4 pos = posq[index];
#endif
            pos.x += delta.x;
            pos.y += delta.y;
            pos.z += delta.z;
#ifdef USE_MIXED_PRECISION
            posq[index] = make_real4((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
            posqCorrection[index] = make_real4(pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
            posq[index] = pos;
#endif
        }
    }
}

/**
 * Update the buffer of white noise.
 */
KERNEL void generateNoise(int numAtoms, int segmentLength, GLOBAL float* RESTRICT noise, GLOBAL const float* RESTRICT random, unsigned int randomIndex) {
    randomIndex = 4*randomIndex; // Interpret it as float instead of float4
    int fftLength = 3*segmentLength;
    for (int i = GROUP_ID; i < 3*numAtoms; i += NUM_GROUPS) {
        // Copy segment 2 over to segment 1

        for (int j = LOCAL_ID; j < segmentLength; j += LOCAL_SIZE)
            noise[i*fftLength+j] = noise[i*fftLength+j+segmentLength];
        SYNC_THREADS

        // Copy segment 3 over to segment 2

        for (int j = LOCAL_ID; j < segmentLength; j += LOCAL_SIZE)
            noise[i*fftLength+j+segmentLength] = noise[i*fftLength+j+2*segmentLength];
        SYNC_THREADS
 
        // Fill segment 3 with new random values.

        for (int j = LOCAL_ID; j < segmentLength; j += LOCAL_SIZE)
            noise[i*fftLength+j+2*segmentLength] = random[randomIndex+i*segmentLength+j];
        SYNC_THREADS
    }
}

DEVICE mixed2 multiplyComplex(mixed2 c1, mixed2 c2) {
    return make_mixed2(c1.x*c2.x-c1.y*c2.y, c1.x*c2.y+c1.y*c2.x);
}

DEVICE mixed2 multiplyComplexConj(mixed2 c1, mixed2 c2) {
    return make_mixed2(c1.x*c2.x+c1.y*c2.y, c1.x*c2.y-c1.y*c2.x);
}

/**
 * Generate the random force for the next segment.
 */
KERNEL void generateRandomForce(int numAtoms, int segmentLength, mixed dt, mixed friction, GLOBAL float* RESTRICT noise,
        GLOBAL mixed* RESTRICT randomForce, GLOBAL mixed4* RESTRICT velm, GLOBAL mixed* RESTRICT thetad,
        GLOBAL mixed* RESTRICT cutoffFunction, GLOBAL int* RESTRICT particleType, GLOBAL mixed* RESTRICT adaptedFriction,
        GLOBAL mixed2* RESTRICT workspace) {
    const int fftLength = 3*segmentLength;
    const int numFreq = (fftLength+1)/2;
    GLOBAL mixed2* data0 = &workspace[GROUP_ID*3*fftLength];
    GLOBAL mixed2* data1 = &data0[fftLength];
    GLOBAL mixed2* w = &data1[fftLength];
    for (int i = LOCAL_ID; i < fftLength; i += LOCAL_SIZE)
        w[i] = make_mixed2(cos(-i*2*M_PI/fftLength), sin(-i*2*M_PI/fftLength));
    for (int i = GROUP_ID; i < 3*numAtoms; i += NUM_GROUPS) {
        int atom = i/3;
        int axis = i%3;
        mixed invMass = velm[atom].w;
        int type = particleType[atom];
        if (invMass != 0) {
            for (int j = LOCAL_ID; j < fftLength; j += LOCAL_SIZE)
                data0[j] = make_mixed2(noise[i*fftLength+j], 0);
            SYNC_THREADS
            FFT_FORWARD
            for (int j = LOCAL_ID; j < numFreq; j += LOCAL_SIZE) {
                mixed f = M_PI*j/(numFreq*dt);
                mixed gamma = adaptedFriction[type*numFreq+j];
                mixed cw = (1 - 2*EXP(-dt*friction)*COS(f*dt) + EXP(-2*friction*dt)) / ((friction*friction+f*f)*dt*dt);
                mixed2 d = RECIP_DATA[j] * SQRT(cutoffFunction[j]*thetad[j]*cw*gamma/friction);
                RECIP_DATA[j] = d;
                if (j != 0 && j*2 != fftLength)
                    RECIP_DATA[fftLength-j] = make_mixed2(d.x, -d.y);
            }
            SYNC_THREADS
            FFT_BACKWARD
            const mixed scale = SQRT(2*friction/(dt*invMass))/fftLength;
            for (int j = LOCAL_ID; j < segmentLength; j += LOCAL_SIZE)
                randomForce[numAtoms*(3*j+axis)+atom] = scale*data0[segmentLength+j].x;
            SYNC_THREADS
        }
    }
}

/**
 * Update the friction rates used for generating noise, part 1: compute the error in the
 * fluctuation dissipation theorem.
 */
KERNEL void adaptFrictionPart1(int numAtoms, int segmentLength, GLOBAL const mixed4* RESTRICT velm, GLOBAL const int* RESTRICT particleType,
        GLOBAL const mixed* RESTRICT randomForce, GLOBAL const mixed* RESTRICT segmentVelocity, GLOBAL const mixed* RESTRICT adaptedFriction,
        GLOBAL mm_ulong* RESTRICT dfdt, GLOBAL mixed2* RESTRICT workspace) {
Peter Eastman's avatar
Peter Eastman committed
172
173
174
175
176
177
178
    const int fftLength = 3*segmentLength;
    const int numFreq = (fftLength+1)/2;
    GLOBAL mixed2* data0 = &workspace[GROUP_ID*3*fftLength];
    GLOBAL mixed2* data1 = &data0[fftLength];
    GLOBAL mixed2* w = &data1[fftLength];
    for (int i = LOCAL_ID; i < fftLength; i += LOCAL_SIZE)
        w[i] = make_mixed2(cos(-i*2*M_PI/fftLength), sin(-i*2*M_PI/fftLength));
179
180
181
182
183
184
185
186
187
    for (int i = GROUP_ID; i < 3*numAtoms; i += NUM_GROUPS) {
        int atom = i/3;
        int axis = i%3;
        int type = particleType[atom];
        mixed invMass = velm[atom].w;
        if (invMass != 0) {
            // Pack the velocities and forces together so we can transform both at once.
            for (int j = LOCAL_ID; j < segmentLength; j += LOCAL_SIZE)
                data0[j] = make_mixed2(segmentVelocity[numAtoms*(3*j+axis)+atom], randomForce[numAtoms*(3*j+axis)+atom]);
Peter Eastman's avatar
Peter Eastman committed
188
189
            for (int j = segmentLength+LOCAL_ID; j < fftLength; j += LOCAL_SIZE)
                data0[j] = make_mixed2(0);
190
191
192
193
            SYNC_THREADS
            ADAPTATION_FFT
            for (int j = LOCAL_ID; j < numFreq; j += LOCAL_SIZE) {
                mixed2 d1 = ADAPTATION_RECIP[j];
Peter Eastman's avatar
Peter Eastman committed
194
                mixed2 d2 = (j == 0 ? ADAPTATION_RECIP[0] : ADAPTATION_RECIP[fftLength-j]);
195
196
197
198
199
200
201
                mixed2 v = 0.5f*make_mixed2(d1.x+d2.x, d1.y-d2.y);
                mixed2 f = 0.5f*make_mixed2(d1.y+d2.y, -d1.x+d2.x);
                mixed cvv = v.x*v.x + v.y*v.y;
                mixed2 cvf = multiplyComplexConj(f, v);
                mixed dfdtinc = adaptedFriction[type*numFreq+j]*cvv/invMass - cvf.x;
                ATOMIC_ADD(&dfdt[type*numFreq+j], (mm_ulong) realToFixedPoint(dfdtinc));
            }
Peter Eastman's avatar
Peter Eastman committed
202
            SYNC_THREADS
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
        }
    }
}

/**
 * Update the friction rates used for generating noise, part 2: update the friction based
 * on the error.
 */
KERNEL void adaptFrictionPart2(int numTypes, int segmentLength, mixed dt, GLOBAL const int* RESTRICT typeParticleCount,
        GLOBAL const float* RESTRICT typeAdaptationRate, GLOBAL mixed* RESTRICT adaptedFriction, GLOBAL const mm_long* RESTRICT dfdt) {
    int numFreq = (3*segmentLength+1)/2;
    for (int type = GROUP_ID; type < numTypes; type += NUM_GROUPS) {
        mixed scale = dt*typeAdaptationRate[type]/(3*typeParticleCount[type]*segmentLength)/(mixed) 0x100000000;
        for (int i = LOCAL_ID; i < numFreq; i += LOCAL_SIZE) {
            mixed delta = -scale*dfdt[type*numFreq+i];
            adaptedFriction[type*numFreq+i] = max((mixed) 0, adaptedFriction[type*numFreq+i]+delta);
        }
    }
}