kCCMA.cu 8.24 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
/* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
 * Portions copyright (c) 2009 Stanford University and the Authors.           *
 * Authors: Scott Le Grand, Peter Eastman                                     *
 * Contributors:                                                              *
 *                                                                            *
13
14
15
16
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
17
 *                                                                            *
18
19
20
21
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
22
 *                                                                            *
23
24
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
25
26
27
28
29
30
31
32
33
34
35
36
 * -------------------------------------------------------------------------- */

#include <cuda.h>
#include <vector_functions.h>
#include <vector>
#include "gputypes.h"

using namespace std;


static __constant__ cudaGmxSimulation cSim;

37
void SetCCMASim(gpuContext gpu)
38
39
40
41
42
43
{
    cudaError_t status;
    status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));
    RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
}

44
void GetCCMASim(gpuContext gpu)
45
46
47
48
49
50
{
    cudaError_t status;
    status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
    RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}

51
__global__ void
Scott Le Grand's avatar
Scott Le Grand committed
52
53
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(1024, 1)
54
#elif (__CUDA_ARCH__ >= 120)
Scott Le Grand's avatar
Scott Le Grand committed
55
56
57
58
__launch_bounds__(512, 1)
#else
__launch_bounds__(256, 1)
#endif
59
kComputeCCMAConstraintDirections()
60
61
62
{
    // Calculate the direction of each constraint.

63
    for (unsigned int index = threadIdx.x+blockIdx.x*blockDim.x; index < cSim.ccmaConstraints; index += blockDim.x*gridDim.x)
64
    {
65
66
        int2 atoms = cSim.pCcmaAtoms[index];
        float4 dir = cSim.pCcmaDistance[index];
67
68
69
70
71
        float4 oldPos1 = cSim.pOldPosq[atoms.x];
        float4 oldPos2 = cSim.pOldPosq[atoms.y];
        dir.x = oldPos1.x-oldPos2.x;
        dir.y = oldPos1.y-oldPos2.y;
        dir.z = oldPos1.z-oldPos2.z;
72
        cSim.pCcmaDistance[index] = dir;
73
    }
74
75
76
77
78
}

__global__ void
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(1024, 1)
79
#elif (__CUDA_ARCH__ >= 120)
80
81
82
83
84
85
__launch_bounds__(512, 1)
#else
__launch_bounds__(256, 1)
#endif
kComputeCCMAConstraintForces(float4* atomPositions, bool addOldPosition)
{
86
    __shared__ int converged;
87
88
    float lowerTol = 1.0f-2.0f*cSim.shakeTolerance+cSim.shakeTolerance*cSim.shakeTolerance;
    float upperTol = 1.0f+2.0f*cSim.shakeTolerance+cSim.shakeTolerance*cSim.shakeTolerance;
89
90
91
    if (threadIdx.x == 0)
        converged = 1;
    __syncthreads();
92

93
94
95
96
97
98
99
100
101
102
    // Calculate the constraint force for each constraint.

    for (unsigned int index = threadIdx.x+blockIdx.x*blockDim.x; index < cSim.ccmaConstraints; index += blockDim.x*gridDim.x)
    {
        int2 atoms = cSim.pCcmaAtoms[index];
        float4 delta1 = atomPositions[atoms.x];
        float4 delta2 = atomPositions[atoms.y];
        float4 dir = cSim.pCcmaDistance[index];
        float3 rp_ij = make_float3(delta1.x-delta2.x, delta1.y-delta2.y, delta1.z-delta2.z);
        if (addOldPosition)
103
        {
104
105
106
            rp_ij.x += dir.x;
            rp_ij.y += dir.y;
            rp_ij.z += dir.z;
107
        }
108
109
110
111
112
113
114
115
116
117
        float rp2 = rp_ij.x*rp_ij.x + rp_ij.y*rp_ij.y + rp_ij.z*rp_ij.z;
        float dist2 = dir.w*dir.w;
        float diff = dist2 - rp2;
        float rrpr  = rp_ij.x*dir.x + rp_ij.y*dir.y + rp_ij.z*dir.z;
        float d_ij2  = dir.x*dir.x + dir.y*dir.y + dir.z*dir.z;
        float reducedMass = cSim.pCcmaReducedMass[index];
        cSim.pCcmaDelta1[index] = (rrpr > d_ij2*1e-6f ? reducedMass*diff/rrpr : 0.0f);

        // See whether it has converged.

118
119
120
121
122
        if (converged && (rp2 < lowerTol*dist2 || rp2 > upperTol*dist2))
        {
            converged = 0;
            *cSim.ccmaConvergedDeviceMarker = 0;
        }
123
124
    }
}
125

126
127
128
__global__ void
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(1024, 1)
129
#elif (__CUDA_ARCH__ >= 120)
130
131
132
133
134
135
__launch_bounds__(512, 1)
#else
__launch_bounds__(256, 1)
#endif
kMultiplyByCCMAConstraintMatrix()
{
136
137
    if (*cSim.ccmaConvergedDeviceMarker)
        return; // The constraint iteration has already converged
138

139
    // Multiply by the inverse constraint matrix.
140

141
142
143
144
    for (unsigned int index = threadIdx.x+blockIdx.x*blockDim.x; index < cSim.ccmaConstraints; index += blockDim.x*gridDim.x)
    {
        float sum = 0.0f;
        for (unsigned int i = 0; ; i++)
145
        {
146
147
148
149
150
            unsigned int element = index+i*cSim.ccmaConstraints;
            unsigned int column = cSim.pConstraintMatrixColumn[element];
            if (column >= cSim.ccmaConstraints)
                break;
            sum += cSim.pCcmaDelta1[column]*cSim.pConstraintMatrixValue[element];
151
        }
152
        cSim.pCcmaDelta2[index] = sum;
153
    }
154
}
155

156
157
158
__global__ void
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(1024, 1)
159
#elif (__CUDA_ARCH__ >= 120)
160
161
162
163
164
165
__launch_bounds__(512, 1)
#else
__launch_bounds__(256, 1)
#endif
kUpdateCCMAAtomPositions(float4* atomPositions, int iteration)
{
166
    if (*cSim.ccmaConvergedDeviceMarker)
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
        return; // The constraint iteration has already converged.
    float damping = (iteration < 2 ? 0.5f : 1.0f);
    for (unsigned int index = threadIdx.x+blockIdx.x*blockDim.x; index < cSim.atoms; index += blockDim.x*gridDim.x)
    {
        float4 atomPos = atomPositions[index];
        float invMass = cSim.pVelm4[index].w;
        int num = cSim.pCcmaNumAtomConstraints[index];
        for (int i = 0; i < num; i++)
        {
            int constraint = cSim.pCcmaAtomConstraints[index+i*cSim.atoms];
            bool forward = (constraint > 0);
            constraint = (forward ? constraint-1 : -constraint-1);
            float constraintForce = damping*invMass*cSim.pCcmaDelta2[constraint];
            constraintForce = (forward ? constraintForce : -constraintForce);
            float4 dir = cSim.pCcmaDistance[constraint];
            atomPos.x += constraintForce*dir.x;
            atomPos.y += constraintForce*dir.y;
            atomPos.z += constraintForce*dir.z;
        }
        atomPositions[index] = atomPos;
    }
}
189

190
191
192
193
void kApplyCCMA(gpuContext gpu, float4* posq, bool addOldPosition)
{
    kComputeCCMAConstraintDirections<<<gpu->sim.blocks, gpu->sim.ccma_threads_per_block>>>();
    LAUNCHERROR("kComputeCCMAConstraintDirections");
194
    const int checkInterval = 3;
195
    for (int i = 0; i < 150; i++) {
196
197
        if ((i+1)%checkInterval == 0)
            *gpu->ccmaConvergedHostMarker = 1;
198
        kComputeCCMAConstraintForces<<<gpu->sim.blocks, gpu->sim.ccma_threads_per_block, gpu->sim.ccma_threads_per_block*sizeof(int)>>>(posq, addOldPosition);
199
        cudaEventRecord(gpu->ccmaEvent, 0);
200
        kMultiplyByCCMAConstraintMatrix<<<gpu->sim.blocks, gpu->sim.ccma_threads_per_block, gpu->sim.ccma_threads_per_block*sizeof(int)>>>();
201
202
203
        kUpdateCCMAAtomPositions<<<gpu->sim.blocks, gpu->sim.ccma_threads_per_block>>>(posq, 3*i+2);
        cudaEventSynchronize(gpu->ccmaEvent);
        if ((i+1)%checkInterval == 0 && *gpu->ccmaConvergedHostMarker)
204
205
            break;
    }
206
207
}

208
void kApplyCCMA(gpuContext gpu)
209
{
210
//    printf("kApplyCCMA\n");
211
    if (gpu->sim.ccmaConstraints > 0)
212
        kApplyCCMA(gpu, gpu->sim.pPosqP, true);
213
}