langevin.cu 5.45 KB
Newer Older
1
2
3
4
5
6
enum {VelScale, ForceScale, NoiseScale, MaxParams};

/**
 * Perform the first step of Langevin integration.
 */

7
extern "C" __global__ void integrateLangevinPart1(int numAtoms, int paddedNumAtoms, mixed4* __restrict__ velm, const long long* __restrict__ force, mixed4* __restrict__ posDelta,
8
9
        const mixed* __restrict__ paramBuffer, const mixed2* __restrict__ dt, const float4* __restrict__ random, unsigned int randomIndex) {
    mixed vscale = paramBuffer[VelScale];
10
    mixed fscale = paramBuffer[ForceScale]/(mixed) 0x100000000;
11
12
    mixed noisescale = paramBuffer[NoiseScale];
    mixed stepSize = dt[0].y;
13
14
    int index = blockIdx.x*blockDim.x+threadIdx.x;
    randomIndex += index;
15
    while (index < numAtoms) {
16
        mixed4 velocity = velm[index];
17
        if (velocity.w != 0) {
18
            mixed sqrtInvMass = SQRT(velocity.w);
19
            velocity.x = vscale*velocity.x + fscale*velocity.w*force[index] + noisescale*sqrtInvMass*random[randomIndex].x;
20
21
            velocity.y = vscale*velocity.y + fscale*velocity.w*force[index+paddedNumAtoms] + noisescale*sqrtInvMass*random[randomIndex].y;
            velocity.z = vscale*velocity.z + fscale*velocity.w*force[index+paddedNumAtoms*2] + noisescale*sqrtInvMass*random[randomIndex].z;
22
            velm[index] = velocity;
23
            posDelta[index] = make_mixed4(stepSize*velocity.x, stepSize*velocity.y, stepSize*velocity.z, 0);
24
25
26
27
28
29
30
31
32
33
        }
        randomIndex += blockDim.x*gridDim.x;
        index += blockDim.x*gridDim.x;
    }
}

/**
 * Perform the second step of Langevin integration.
 */

34
extern "C" __global__ void integrateLangevinPart2(int numAtoms, real4* __restrict__ posq, real4* __restrict__ posqCorrection, const mixed4* __restrict__ posDelta, mixed4* __restrict__ velm, const mixed2* __restrict__ dt) {
35
#if __CUDA_ARCH__ >= 130
36
    double invStepSize = 1.0/dt[0].y;
37
38
39
40
#else
    float invStepSize = 1.0f/dt[0].y;
    float correction = (1.0f-invStepSize*dt[0].y)/dt[0].y;
#endif
41
    int index = blockIdx.x*blockDim.x+threadIdx.x;
42
    while (index < numAtoms) {
43
        mixed4 vel = velm[index];
44
        if (vel.w != 0) {
45
46
47
48
49
#ifdef USE_MIXED_PRECISION
            real4 pos1 = posq[index];
            real4 pos2 = posqCorrection[index];
            mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
50
            real4 pos = posq[index];
51
52
#endif
            mixed4 delta = posDelta[index];
53
54
55
            pos.x += delta.x;
            pos.y += delta.y;
            pos.z += delta.z;
56
#if __CUDA_ARCH__ >= 130
57
58
59
            vel.x = (mixed) (invStepSize*delta.x);
            vel.y = (mixed) (invStepSize*delta.y);
            vel.z = (mixed) (invStepSize*delta.z);
60
61
62
63
64
#else
            vel.x = invStepSize*delta.x + correction*delta.x;
            vel.y = invStepSize*delta.y + correction*delta.x;
            vel.z = invStepSize*delta.z + correction*delta.x;
#endif
65
66
67
68
#ifdef USE_MIXED_PRECISION
            posq[index] = make_real4((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
            posqCorrection[index] = make_real4(pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
69
            posq[index] = pos;
70
#endif
71
72
73
74
75
76
77
78
79
80
            velm[index] = vel;
        }
        index += blockDim.x*gridDim.x;
    }
}

/**
 * Select the step size to use for the next step.
 */

81
extern "C" __global__ void selectLangevinStepSize(int numAtoms, int paddedNumAtoms, mixed maxStepSize, mixed errorTol, mixed friction, mixed kT, mixed2* __restrict__ dt,
82
        const mixed4* __restrict__ velm, const long long* __restrict__ force, mixed* __restrict__ paramBuffer) {
83
84
    // Calculate the error.

85
86
87
    extern __shared__ mixed params[];
    mixed* error = &params[MaxParams];
    mixed err = 0;
88
    unsigned int index = threadIdx.x;
89
    const mixed scale = RECIP((mixed) 0x100000000);
90
91
    while (index < numAtoms) {
        mixed3 f = make_mixed3(scale*force[index], scale*force[index+paddedNumAtoms], scale*force[index+paddedNumAtoms*2]);
92
        mixed invMass = velm[index].w;
93
        err += (f.x*f.x + f.y*f.y + f.z*f.z)*invMass*invMass;
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
        index += blockDim.x*gridDim.x;
    }
    error[threadIdx.x] = err;
    __syncthreads();

    // Sum the errors from all threads.

    for (unsigned int offset = 1; offset < blockDim.x; offset *= 2) {
        if (threadIdx.x+offset < blockDim.x && (threadIdx.x&(2*offset-1)) == 0)
            error[threadIdx.x] += error[threadIdx.x+offset];
        __syncthreads();
    }
    if (blockIdx.x*blockDim.x+threadIdx.x == 0) {
        // Select the new step size.

109
        mixed totalError = SQRT(error[0]/(numAtoms*3));
110
        mixed newStepSize = SQRT(errorTol/totalError);
111
        mixed oldStepSize = dt[0].y;
112
113
114
115
116
117
118
119
120
121
        if (oldStepSize > 0.0f)
            newStepSize = min(newStepSize, oldStepSize*2.0f); // For safety, limit how quickly dt can increase.
        if (newStepSize > oldStepSize && newStepSize < 1.1f*oldStepSize)
            newStepSize = oldStepSize; // Keeping dt constant between steps improves the behavior of the integrator.
        if (newStepSize > maxStepSize)
            newStepSize = maxStepSize;
        dt[0].y = newStepSize;

        // Recalculate the integration parameters.

Peter Eastman's avatar
Peter Eastman committed
122
        mixed vscale = exp(-newStepSize*friction);
123
        mixed fscale = (friction == 0 ? newStepSize : (1-vscale)/friction);
Peter Eastman's avatar
Peter Eastman committed
124
        mixed noisescale = sqrt(kT*(1-vscale*vscale));
125
126
127
128
129
130
131
132
        params[VelScale] = vscale;
        params[ForceScale] = fscale;
        params[NoiseScale] = noisescale;
    }
    __syncthreads();
    if (threadIdx.x < MaxParams)
        paramBuffer[threadIdx.x] = params[threadIdx.x];
}