kCCMA.cu 8.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
 * Portions copyright (c) 2009 Stanford University and the Authors.           *
 * Authors: Scott Le Grand, Peter Eastman                                     *
 * Contributors:                                                              *
 *                                                                            *
13
14
15
16
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
17
 *                                                                            *
18
19
20
21
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
22
 *                                                                            *
23
24
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
25
26
27
28
29
30
31
32
33
34
35
36
 * -------------------------------------------------------------------------- */

#include <cuda.h>
#include <vector_functions.h>
#include <vector>
#include "gputypes.h"

using namespace std;


static __constant__ cudaGmxSimulation cSim;

37
void SetCCMASim(gpuContext gpu)
38
39
40
41
42
43
{
    cudaError_t status;
    status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
    RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
}

44
void GetCCMASim(gpuContext gpu)
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
{
    cudaError_t status;
    status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
    RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}

/**
 * Synchronize all threads across all blocks.
 */
__device__ void kSyncAllThreads_kernel(short* syncCounter, short newCount)
{
    __syncthreads();
    if (threadIdx.x == 0)
        syncCounter[blockIdx.x] = newCount;
    if (threadIdx.x < gridDim.x)
    {
        volatile short* counter = &syncCounter[threadIdx.x];
        do
        {
        } while (*counter != newCount);
    }
    __syncthreads();
}

Scott Le Grand's avatar
Scott Le Grand committed
69
70
71
72
73
74
75
76
77
__global__ void 
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(1024, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(512, 1)
#else
__launch_bounds__(256, 1)
#endif
kApplyCCMA_kernel(float4* atomPositions, bool addOldPosition)
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
{
    // Initialize counters used for monitoring convergence and doing global thread synchronization.

    __shared__ unsigned int requiredIterations;
    if (threadIdx.x == 0)
    {
        requiredIterations = 0;
        cSim.pSyncCounter[gridDim.x+blockIdx.x] = -1;
        cSim.pSyncCounter[2*gridDim.x+blockIdx.x] = -1;
        cSim.pRequiredIterations[0] = 0;
    }

    // Calculate the direction of each constraint.

    unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
93
    while (pos < cSim.ccmaConstraints)
94
    {
95
96
        int2 atoms = cSim.pCcmaAtoms[pos];
        float4 dir = cSim.pCcmaDistance[pos];
97
98
99
100
101
        float4 oldPos1 = cSim.pOldPosq[atoms.x];
        float4 oldPos2 = cSim.pOldPosq[atoms.y];
        dir.x = oldPos1.x-oldPos2.x;
        dir.y = oldPos1.y-oldPos2.y;
        dir.z = oldPos1.z-oldPos2.z;
102
        cSim.pCcmaDistance[pos] = dir;
103
104
105
106
107
108
109
110
111
        pos += blockDim.x*gridDim.x;
    }
    __syncthreads();

    // Iteratively update the atom positions.

    unsigned int maxIterations = 150;
    float lowerTol = 1.0f-2.0f*cSim.shakeTolerance+cSim.shakeTolerance*cSim.shakeTolerance;
    float upperTol = 1.0f+2.0f*cSim.shakeTolerance+cSim.shakeTolerance*cSim.shakeTolerance;
112
    for (unsigned int iteration = 0; iteration < maxIterations; iteration++)
113
114
115
116
    {
        // Calculate the constraint force for each constraint.

        pos = threadIdx.x + blockIdx.x * blockDim.x;
117
        while (pos < cSim.ccmaConstraints)
118
        {
119
            int2 atoms = cSim.pCcmaAtoms[pos];
120
121
            float4 delta1 = atomPositions[atoms.x];
            float4 delta2 = atomPositions[atoms.y];
122
            float4 dir = cSim.pCcmaDistance[pos];
123
124
125
126
127
128
129
130
131
132
133
134
            float3 rp_ij = make_float3(delta1.x-delta2.x, delta1.y-delta2.y, delta1.z-delta2.z);
            if (addOldPosition)
            {
                rp_ij.x += dir.x;
                rp_ij.y += dir.y;
                rp_ij.z += dir.z;
            }
            float rp2 = rp_ij.x*rp_ij.x + rp_ij.y*rp_ij.y + rp_ij.z*rp_ij.z;
            float dist2 = dir.w*dir.w;
            float diff = dist2 - rp2;
            float rrpr  = rp_ij.x*dir.x + rp_ij.y*dir.y + rp_ij.z*dir.z;
            float d_ij2  = dir.x*dir.x + dir.y*dir.y + dir.z*dir.z;
135
136
            float reducedMass = cSim.pCcmaReducedMass[pos];
            cSim.pCcmaDelta1[pos] = (rrpr > d_ij2*1e-6f ? reducedMass*diff/rrpr : 0.0f);
137
138
139
140
            if (requiredIterations == iteration && (rp2 < lowerTol*dist2 || rp2 > upperTol*dist2))
                requiredIterations = iteration+1;
            pos += blockDim.x * gridDim.x;
        }
141
        __syncthreads();
142
143
        if (threadIdx.x == 0 && requiredIterations > iteration)
            cSim.pRequiredIterations[0] = requiredIterations;
144
145
146
        kSyncAllThreads_kernel(cSim.pSyncCounter, iteration);
        if (iteration == cSim.pRequiredIterations[0])
            break; // All constraints have converged.
147

148
149
        // Multiply by the inverse constraint matrix.

150
        pos = threadIdx.x + blockIdx.x * blockDim.x;
151
        while (pos < cSim.ccmaConstraints)
152
        {
153
154
            float sum = 0.0f;
            for (unsigned int i = 0; ; i++)
155
            {
156
                unsigned int index = pos+i*cSim.ccmaConstraints;
157
                unsigned int column = cSim.pConstraintMatrixColumn[index];
158
                if (column >= cSim.ccmaConstraints)
159
                    break;
160
                sum += cSim.pCcmaDelta1[column]*cSim.pConstraintMatrixValue[index];
161
            }
162
            cSim.pCcmaDelta2[pos] = sum;
163
            pos += blockDim.x * gridDim.x;
164
        }
165
        kSyncAllThreads_kernel(&cSim.pSyncCounter[gridDim.x], iteration);
166
167
168
169

        // Update the position of each atom.

        pos = threadIdx.x + blockIdx.x * blockDim.x;
170
        float damping = (iteration < 2 ? 0.5f : 1.0f);
171
172
173
174
        while (pos < cSim.atoms)
        {
            float4 atomPos = atomPositions[pos];
            float invMass = cSim.pVelm4[pos].w;
175
            int num = cSim.pCcmaNumAtomConstraints[pos];
176
177
178
            for (int i = 0; i < num; i++)
            {
                int index = pos+i*cSim.atoms;
179
                int constraint = cSim.pCcmaAtomConstraints[index];
180
181
                bool forward = (constraint > 0);
                constraint = (forward ? constraint-1 : -constraint-1);
182
                float constraintForce = damping*invMass*cSim.pCcmaDelta2[constraint];
183
                constraintForce = (forward ? constraintForce : -constraintForce);
184
                float4 dir = cSim.pCcmaDistance[constraint];
185
186
187
188
189
190
191
                atomPos.x += constraintForce*dir.x;
                atomPos.y += constraintForce*dir.y;
                atomPos.z += constraintForce*dir.z;
            }
            atomPositions[pos] = atomPos;
            pos += blockDim.x*gridDim.x;
        }
192
193
        if (threadIdx.x == 0)
            requiredIterations = iteration+1;
194
195
196
197
198
199
200
201
202
        kSyncAllThreads_kernel(&cSim.pSyncCounter[2*gridDim.x], iteration);
    }

    // Reset the initial sync counter to be ready for the next call.

    if (threadIdx.x == 0)
        cSim.pSyncCounter[blockIdx.x] = -1;
}

203
void kApplyFirstCCMA(gpuContext gpu)
204
{
205
206
//    printf("kApplyFirstCCMA\n");
    if (gpu->sim.ccmaConstraints > 0)
207
    {
208
209
        kApplyCCMA_kernel<<<gpu->sim.blocks, gpu->sim.ccma_threads_per_block>>>(gpu->sim.pPosqP, true);
        LAUNCHERROR("kApplyCCMA");
210
211
212
    }
}

213
void kApplySecondCCMA(gpuContext gpu)
214
{
215
216
//    printf("kApplySecondCCMA\n");
    if (gpu->sim.ccmaConstraints > 0)
217
    {
218
219
        kApplyCCMA_kernel<<<gpu->sim.blocks, gpu->sim.ccma_threads_per_block>>>(gpu->sim.pPosq, false);
        LAUNCHERROR("kApplyCCMA");
220
221
    }
}