customIntegratorPerDof.cu 2.31 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
/**
 * Load the position of a particle.
 */
inline __device__ mixed4 loadPos(const real4* __restrict__ posq, const real4* __restrict__ posqCorrection, int index) {
#ifdef USE_MIXED_PRECISION
    real4 pos1 = posq[index];
    real4 pos2 = posqCorrection[index];
    return make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
    return posq[index];
#endif
}

/**
 * Store the position of a particle.
 */
inline __device__ void storePos(real4* __restrict__ posq, real4* __restrict__ posqCorrection, int index, mixed4 pos) {
#ifdef USE_MIXED_PRECISION
    posq[index] = make_real4((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
    posqCorrection[index] = make_real4(pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
    posq[index] = pos;
#endif
}

inline __device__ double4 convertToDouble4(mixed4 a) {
27
28
29
    return make_double4(a.x, a.y, a.z, a.w);
}

30
31
inline __device__ mixed4 convertFromDouble4(double4 a) {
    return make_mixed4(a.x, a.y, a.z, a.w);
32
33
}

34
35
extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ posDelta,
        mixed4* __restrict__ velm, const long long* __restrict__ force, const mixed2* __restrict__ dt, const mixed* __restrict__ globals,
36
37
        mixed* __restrict__ sum, const float4* __restrict__ gaussianValues, unsigned int gaussianBaseIndex, const float4* __restrict__ uniformValues,
        const real energy, mixed* __restrict__ energyParamDerivs
38
        PARAMETER_ARGUMENTS) {
39
    mixed stepSize = dt[0].y;
40
41
42
43
    int index = blockIdx.x*blockDim.x+threadIdx.x;
    const double forceScale = 1.0/0xFFFFFFFF;
    while (index < NUM_ATOMS) {
#ifdef LOAD_POS_AS_DELTA
44
        double4 position = convertToDouble4(loadPos(posq, posqCorrection, index)+posDelta[index]);
45
#else
46
        double4 position = convertToDouble4(loadPos(posq, posqCorrection, index));
47
48
49
50
51
#endif
        double4 velocity = convertToDouble4(velm[index]);
        double4 f = make_double4(forceScale*force[index], forceScale*force[index+PADDED_NUM_ATOMS], forceScale*force[index+PADDED_NUM_ATOMS*2], 0.0);
        double mass = 1.0/velocity.w;
        if (velocity.w != 0.0) {
52
53
            int gaussianIndex = gaussianBaseIndex;
            int uniformIndex = 0;
54
55
56
57
58
            COMPUTE_STEP
        }
        index += blockDim.x*gridDim.x;
    }
}