kCCMA.cu 8.46 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
 * Portions copyright (c) 2009 Stanford University and the Authors.           *
 * Authors: Scott Le Grand, Peter Eastman                                     *
 * Contributors:                                                              *
 *                                                                            *
13
14
15
16
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
17
 *                                                                            *
18
19
20
21
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
22
 *                                                                            *
23
24
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
25
26
27
28
29
30
31
32
33
34
35
36
 * -------------------------------------------------------------------------- */

#include <cuda.h>
#include <vector_functions.h>
#include <vector>
#include "gputypes.h"

using namespace std;


static __constant__ cudaGmxSimulation cSim;

37
void SetCCMASim(gpuContext gpu)
38
39
40
41
42
43
{
    cudaError_t status;
    status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
    RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
}

44
void GetCCMASim(gpuContext gpu)
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
{
    cudaError_t status;
    status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
    RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}

/**
 * Synchronize all threads across all blocks.
 */
__device__ void kSyncAllThreads_kernel(short* syncCounter, short newCount)
{
    __syncthreads();
    if (threadIdx.x == 0)
        syncCounter[blockIdx.x] = newCount;
    if (threadIdx.x < gridDim.x)
    {
        volatile short* counter = &syncCounter[threadIdx.x];
        do
        {
        } while (*counter != newCount);
    }
    __syncthreads();
}

69
__global__ void kApplyCCMA_kernel(float4* atomPositions, bool addOldPosition)
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
{
    // Initialize counters used for monitoring convergence and doing global thread synchronization.

    __shared__ unsigned int requiredIterations;
    if (threadIdx.x == 0)
    {
        requiredIterations = 0;
        cSim.pSyncCounter[gridDim.x+blockIdx.x] = -1;
        cSim.pSyncCounter[2*gridDim.x+blockIdx.x] = -1;
        cSim.pRequiredIterations[0] = 0;
    }

    // Calculate the direction of each constraint.

    unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
85
    while (pos < cSim.ccmaConstraints)
86
    {
87
88
        int2 atoms = cSim.pCcmaAtoms[pos];
        float4 dir = cSim.pCcmaDistance[pos];
89
90
91
92
93
        float4 oldPos1 = cSim.pOldPosq[atoms.x];
        float4 oldPos2 = cSim.pOldPosq[atoms.y];
        dir.x = oldPos1.x-oldPos2.x;
        dir.y = oldPos1.y-oldPos2.y;
        dir.z = oldPos1.z-oldPos2.z;
94
        cSim.pCcmaDistance[pos] = dir;
95
96
97
98
99
100
101
102
103
        pos += blockDim.x*gridDim.x;
    }
    __syncthreads();

    // Iteratively update the atom positions.

    unsigned int maxIterations = 150;
    float lowerTol = 1.0f-2.0f*cSim.shakeTolerance+cSim.shakeTolerance*cSim.shakeTolerance;
    float upperTol = 1.0f+2.0f*cSim.shakeTolerance+cSim.shakeTolerance*cSim.shakeTolerance;
104
    for (unsigned int iteration = 0; iteration < maxIterations; iteration++)
105
106
107
108
    {
        // Calculate the constraint force for each constraint.

        pos = threadIdx.x + blockIdx.x * blockDim.x;
109
        while (pos < cSim.ccmaConstraints)
110
        {
111
            int2 atoms = cSim.pCcmaAtoms[pos];
112
113
            float4 delta1 = atomPositions[atoms.x];
            float4 delta2 = atomPositions[atoms.y];
114
            float4 dir = cSim.pCcmaDistance[pos];
115
116
117
118
119
120
121
122
123
124
125
126
            float3 rp_ij = make_float3(delta1.x-delta2.x, delta1.y-delta2.y, delta1.z-delta2.z);
            if (addOldPosition)
            {
                rp_ij.x += dir.x;
                rp_ij.y += dir.y;
                rp_ij.z += dir.z;
            }
            float rp2 = rp_ij.x*rp_ij.x + rp_ij.y*rp_ij.y + rp_ij.z*rp_ij.z;
            float dist2 = dir.w*dir.w;
            float diff = dist2 - rp2;
            float rrpr  = rp_ij.x*dir.x + rp_ij.y*dir.y + rp_ij.z*dir.z;
            float d_ij2  = dir.x*dir.x + dir.y*dir.y + dir.z*dir.z;
127
128
            float reducedMass = cSim.pCcmaReducedMass[pos];
            cSim.pCcmaDelta1[pos] = (rrpr > d_ij2*1e-6f ? reducedMass*diff/rrpr : 0.0f);
129
130
131
132
            if (requiredIterations == iteration && (rp2 < lowerTol*dist2 || rp2 > upperTol*dist2))
                requiredIterations = iteration+1;
            pos += blockDim.x * gridDim.x;
        }
133
        __syncthreads();
134
135
        if (threadIdx.x == 0 && requiredIterations > iteration)
            cSim.pRequiredIterations[0] = requiredIterations;
136
137
138
        kSyncAllThreads_kernel(cSim.pSyncCounter, iteration);
        if (iteration == cSim.pRequiredIterations[0])
            break; // All constraints have converged.
139

140
141
        // Multiply by the inverse constraint matrix.

142
        pos = threadIdx.x + blockIdx.x * blockDim.x;
143
        while (pos < cSim.ccmaConstraints)
144
        {
145
146
            float sum = 0.0f;
            for (unsigned int i = 0; ; i++)
147
            {
148
                unsigned int index = pos+i*cSim.ccmaConstraints;
149
                unsigned int column = cSim.pConstraintMatrixColumn[index];
150
                if (column >= cSim.ccmaConstraints)
151
                    break;
152
                sum += cSim.pCcmaDelta1[column]*cSim.pConstraintMatrixValue[index];
153
            }
154
            cSim.pCcmaDelta2[pos] = sum;
155
            pos += blockDim.x * gridDim.x;
156
        }
157
        kSyncAllThreads_kernel(&cSim.pSyncCounter[gridDim.x], iteration);
158
159
160
161

        // Update the position of each atom.

        pos = threadIdx.x + blockIdx.x * blockDim.x;
162
        float damping = (iteration < 2 ? 0.5f : 1.0f);
163
164
165
166
        while (pos < cSim.atoms)
        {
            float4 atomPos = atomPositions[pos];
            float invMass = cSim.pVelm4[pos].w;
167
            int num = cSim.pCcmaNumAtomConstraints[pos];
168
169
170
            for (int i = 0; i < num; i++)
            {
                int index = pos+i*cSim.atoms;
171
                int constraint = cSim.pCcmaAtomConstraints[index];
172
173
                bool forward = (constraint > 0);
                constraint = (forward ? constraint-1 : -constraint-1);
174
                float constraintForce = damping*invMass*cSim.pCcmaDelta2[constraint];
175
                constraintForce = (forward ? constraintForce : -constraintForce);
176
                float4 dir = cSim.pCcmaDistance[constraint];
177
178
179
180
181
182
183
                atomPos.x += constraintForce*dir.x;
                atomPos.y += constraintForce*dir.y;
                atomPos.z += constraintForce*dir.z;
            }
            atomPositions[pos] = atomPos;
            pos += blockDim.x*gridDim.x;
        }
184
185
        if (threadIdx.x == 0)
            requiredIterations = iteration+1;
186
187
188
189
190
191
192
193
194
        kSyncAllThreads_kernel(&cSim.pSyncCounter[2*gridDim.x], iteration);
    }

    // Reset the initial sync counter to be ready for the next call.

    if (threadIdx.x == 0)
        cSim.pSyncCounter[blockIdx.x] = -1;
}

195
void kApplyFirstCCMA(gpuContext gpu)
196
{
197
198
//    printf("kApplyFirstCCMA\n");
    if (gpu->sim.ccmaConstraints > 0)
199
    {
200
201
        kApplyCCMA_kernel<<<gpu->sim.blocks, gpu->sim.ccma_threads_per_block>>>(gpu->sim.pPosqP, true);
        LAUNCHERROR("kApplyCCMA");
202
203
204
    }
}

205
void kApplySecondCCMA(gpuContext gpu)
206
{
207
208
//    printf("kApplySecondCCMA\n");
    if (gpu->sim.ccmaConstraints > 0)
209
    {
210
211
        kApplyCCMA_kernel<<<gpu->sim.blocks, gpu->sim.ccma_threads_per_block>>>(gpu->sim.pPosq, false);
        LAUNCHERROR("kApplyCCMA");
212
213
    }
}