"...cuda-old/src/kernels/kCalculateCustomNonbondedForces.h" did not exist on "e04283fe8e7c06042528637c217a6bcc120632f5"
verlet.cc 4.8 KB
Newer Older
1
2
3
4
/**
 * Perform the first step of Verlet integration.
 */

5
6
7
8
9
10
KERNEL void integrateVerletPart1(int numAtoms, int paddedNumAtoms, GLOBAL const mixed2* RESTRICT dt, GLOBAL const real4* RESTRICT posq,
        GLOBAL mixed4* RESTRICT velm, GLOBAL const mm_long* RESTRICT force, GLOBAL mixed4* RESTRICT posDelta
#ifdef USE_MIXED_PRECISION
        , GLOBAL const real4* RESTRICT posqCorrection
#endif
    ) {
11
12
13
    const mixed2 stepSize = dt[0];
    const mixed dtPos = stepSize.y;
    const mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
14
    const mixed scale = dtVel/(mixed) 0x100000000;
15
    for (int index = GLOBAL_ID; index < numAtoms; index += GLOBAL_SIZE) {
16
        mixed4 velocity = velm[index];
17
        if (velocity.w != 0.0) {
18
19
20
21
22
#ifdef USE_MIXED_PRECISION
            real4 pos1 = posq[index];
            real4 pos2 = posqCorrection[index];
            mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
23
            real4 pos = posq[index];
24
#endif
25
            velocity.x += scale*force[index]*velocity.w;
26
27
            velocity.y += scale*force[index+paddedNumAtoms]*velocity.w;
            velocity.z += scale*force[index+paddedNumAtoms*2]*velocity.w;
28
29
30
31
32
33
34
35
36
37
38
39
40
            pos.x = velocity.x*dtPos;
            pos.y = velocity.y*dtPos;
            pos.z = velocity.z*dtPos;
            posDelta[index] = pos;
            velm[index] = velocity;
        }
    }
}

/**
 * Perform the second step of Verlet integration.
 */

41
42
43
44
45
46
KERNEL void integrateVerletPart2(int numAtoms, GLOBAL mixed2* RESTRICT dt, GLOBAL real4* RESTRICT posq,
        GLOBAL mixed4* RESTRICT velm, GLOBAL const mixed4* RESTRICT posDelta
#ifdef USE_MIXED_PRECISION
        , GLOBAL real4* RESTRICT posqCorrection
#endif
    ) {
47
    mixed2 stepSize = dt[0];
48
#ifdef SUPPORTS_DOUBLE_PRECISION
49
    double oneOverDt = 1.0/stepSize.y;
50
51
52
53
#else
    float oneOverDt = 1.0f/stepSize.y;
    float correction = (1.0f-oneOverDt*stepSize.y)/stepSize.y;
#endif
54
    if (GLOBAL_ID == 0)
55
        dt[0].x = stepSize.y;
56
57
58
    SYNC_THREADS;
    int index = GLOBAL_ID;
    for (; index < numAtoms; index += GLOBAL_SIZE) {
59
        mixed4 velocity = velm[index];
60
        if (velocity.w != 0.0) {
61
62
63
64
65
#ifdef USE_MIXED_PRECISION
            real4 pos1 = posq[index];
            real4 pos2 = posqCorrection[index];
            mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
66
            real4 pos = posq[index];
67
68
#endif
            mixed4 delta = posDelta[index];
69
70
71
            pos.x += delta.x;
            pos.y += delta.y;
            pos.z += delta.z;
72
#ifdef SUPPORTS_DOUBLE_PRECISION
73
            velocity = make_mixed4((mixed) (delta.x*oneOverDt), (mixed) (delta.y*oneOverDt), (mixed) (delta.z*oneOverDt), velocity.w);
74
75
76
#else
            velocity = make_mixed4((mixed) (delta.x*oneOverDt+delta.x*correction), (mixed) (delta.y*oneOverDt+delta.y*correction), (mixed) (delta.z*oneOverDt+delta.z*correction), velocity.w);
#endif
77
78
79
80
#ifdef USE_MIXED_PRECISION
            posq[index] = make_real4((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
            posqCorrection[index] = make_real4(pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
81
            posq[index] = pos;
82
#endif
83
84
85
86
87
88
89
90
            velm[index] = velocity;
        }
    }
}

/**
 * Select the step size to use for the next step.
 */
91

92
KERNEL void selectVerletStepSize(int numAtoms, int paddedNumAtoms, mixed maxStepSize, mixed errorTol, GLOBAL mixed2* RESTRICT dt, GLOBAL const mixed4* RESTRICT velm, GLOBAL const mm_long* RESTRICT force) {
93
94
    // Calculate the error.

95
96
    LOCAL mixed error[256];
    mixed err = 0;
97
    const mixed scale = RECIP((mixed) 0x100000000);
98
    for (int index = LOCAL_ID; index < numAtoms; index += LOCAL_SIZE) {
99
        mixed3 f = make_mixed3(scale*force[index], scale*force[index+paddedNumAtoms], scale*force[index+paddedNumAtoms*2]);
100
        mixed invMass = velm[index].w;
101
        err += (f.x*f.x + f.y*f.y + f.z*f.z)*invMass*invMass;
102
    }
103
104
    error[LOCAL_ID] = err;
    SYNC_THREADS;
105
106
107

    // Sum the errors from all threads.

108
109
110
111
    for (unsigned int offset = 1; offset < LOCAL_SIZE; offset *= 2) {
        if (LOCAL_ID+offset < LOCAL_SIZE && (LOCAL_ID&(2*offset-1)) == 0)
            error[LOCAL_ID] += error[LOCAL_ID+offset];
        SYNC_THREADS;
112
    }
113
    if (LOCAL_ID == 0) {
114
        mixed totalError = SQRT(error[0]/(numAtoms*3));
115
        mixed newStepSize = SQRT(errorTol/totalError);
116
        mixed oldStepSize = dt[0].y;
117
118
119
120
121
122
123
124
125
        if (oldStepSize > 0.0f)
            newStepSize = min(newStepSize, oldStepSize*2.0f); // For safety, limit how quickly dt can increase.
        if (newStepSize > oldStepSize && newStepSize < 1.1f*oldStepSize)
            newStepSize = oldStepSize; // Keeping dt constant between steps improves the behavior of the integrator.
        if (newStepSize > maxStepSize)
            newStepSize = maxStepSize;
        dt[0].y = newStepSize;
    }
}