Improved how the constraint matrices for C-SHAKE get built

27ff4f1b · Peter Eastman · 043c7b6c · 27ff4f1b · 27ff4f1b · 27ff4f1b
Commit 27ff4f1b authored May 15, 2009 by Peter Eastman
3 changed files
--- a/platforms/cuda/src/kernels/gpu.cpp
+++ b/platforms/cuda/src/kernels/gpu.cpp
@@ -48,8 +48,11 @@ using namespace std;
 #include "cudaKernels.h"
 #include "hilbert.h"
 #include "openmm/OpenMMException.h"
+#include "jama_svd.h"
 using OpenMM::OpenMMException;
+using TNT::Array2D;
+using JAMA::SVD;
 struct ShakeCluster {
    int centralID;
@@ -464,7 +467,7 @@ static void markShakeClusterInvalid(ShakeCluster& cluster, map<int, ShakeCluster
    }
 }
-static void findRigidClusters(gpuContext gpu, const vector<int>& firstAtom, const vector<int>& secondAtom, vector<int>& constraintIndices)
+static void findRigidClusters(gpuContext gpu, const vector<int>& firstAtom, const vector<int>& secondAtom, const vector<float>& invMass1, const vector<float>& invMass2, const vector<float>& distance, vector<int>& constraintIndices)
 {
    vector<map<int, int> > atomConstraints(firstAtom.size());
    for (int i = 0; i < (int)constraintIndices.size(); i++) {
@@ -583,7 +586,97 @@ static void findRigidClusters(gpuContext gpu, const vector<int>& firstAtom, cons
    while (gpu->sim.clusterShakeBlockSize < maxClusterSize)
        gpu->sim.clusterShakeBlockSize *= 2;
    psRigidClusterConstraintIndex->Upload();
-    gpu->hasInitializedRigidClusters = false;
+    // Build the inverse coupling matrix for each cluster.
+    unsigned int elementIndex = 0;
+    for (unsigned int i = 0; i < rigidClusters.size(); i++) {
+        // Compute the constraint coupling matrix for this cluster.
+        const vector<int>& cluster = rigidClusters[i];
+        unsigned int size = cluster.size();
+        Array2D<double> matrix(size, size);
+        for (int j = 0; j < (int)size; j++) {
+            for (int k = 0; k < (int)size; k++) {
+                if (j == k) {
+                    matrix[j][j] = 1.0;
+                    continue;
+                }
+                double scale;
+                int atomj0 = firstAtom[cluster[j]];
+                int atomj1 = secondAtom[cluster[j]];
+                int atomk0 = firstAtom[cluster[k]];
+                int atomk1 = secondAtom[cluster[k]];
+                int atoma, atomb;
+                if (atomj0 == atomk0) {
+                    atoma = atomj1;
+                    atomb = atomk1;
+                    scale = invMass1[cluster[j]]/(invMass1[cluster[j]]+invMass2[cluster[j]]);
+                }
+                else if (atomj1 == atomk1) {
+                    atoma = atomj0;
+                    atomb = atomk0;
+                    scale = invMass2[cluster[j]]/(invMass1[cluster[j]]+invMass2[cluster[j]]);
+                }
+                else if (atomj0 == atomk1) {
+                    atoma = atomj1;
+                    atomb = atomk0;
+                    scale = invMass1[cluster[j]]/(invMass1[cluster[j]]+invMass2[cluster[j]]);
+                }
+                else if (atomj1 == atomk0) {
+                    atoma = atomj0;
+                    atomb = atomk1;
+                    scale = invMass2[cluster[j]]/(invMass1[cluster[j]]+invMass2[cluster[j]]);
+                }
+                else {
+                    matrix[j][k] = 0.0;
+                    continue; // These constraints are not connected.
+                }
+                // Find the third constraint forming a triangle with these two.
+                for (int m = 0; m < size; m++) {
+                    int other = cluster[m];
+                    if ((firstAtom[other] == atoma && secondAtom[other] == atomb) || (firstAtom[other] == atomb && secondAtom[other] == atoma)) {
+                        double d1 = distance[cluster[j]];
+                        double d2 = distance[cluster[k]];
+                        double d3 = distance[other];
+                        matrix[j][k] = scale*(d1*d1+d2*d2-d3*d3)/(2.0*d1*d2);
+                        break;
+                    }
+                }
+            }
+        }
+        // Invert it using SVD.
+        Array2D<double> u, v;
+        Array1D<double> w;
+        SVD<double> svd(matrix);
+        svd.getU(u);
+        svd.getV(v);
+        svd.getSingularValues(w);
+        double singularValueCutoff = 0.01*w[0];
+        for (int j = 0; j < (int)size; j++)
+            w[j] = (w[j] < singularValueCutoff ? 0.0 : 1.0/w[j]);
+        for (int j = 0; j < (int)size; j++) {
+            for (int k = 0; k < (int)size; k++) {
+                matrix[j][k] = 0.0;
+                for (int m = 0; m < (int)size; m++)
+                    matrix[j][k] += v[j][m]*w[m]*u[k][m];
+            }
+        }
+        // Record the inverted matrix.
+        (*gpu->psRigidClusterMatrixIndex)[i] = elementIndex;
+        for (int j = 0; j < (int)size; j++)
+            for (int k = 0; k < (int)size; k++)
+                (*gpu->psRigidClusterMatrix)[elementIndex++] = (float)(matrix[k][j]*distance[cluster[j]]/distance[cluster[k]]);
+    }
+    (*gpu->psRigidClusterMatrixIndex)[gpu->sim.rigidClusters] = elementIndex;
+    gpu->psRigidClusterMatrix->Upload();
+    gpu->psRigidClusterMatrixIndex->Upload();
 }
 extern "C"
@@ -791,7 +884,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
    // Identify rigid clusters of atoms.
-    findRigidClusters(gpu, atom1, atom2, lincsConstraints);
+    findRigidClusters(gpu, atom1, atom2, invMass1, invMass2, distance, lincsConstraints);
    // Record the connections between constraints.

--- a/platforms/cuda/src/kernels/gputypes.h
+++ b/platforms/cuda/src/kernels/gputypes.h
@@ -75,7 +75,6 @@ struct _gpuContext {
    bool bRecalculateBornRadii;
    bool bOutputBufferPerWarp;
    bool bIncludeGBSA;
-    bool hasInitializedRigidClusters;
    unsigned long seed;
    SM_VERSION sm_version;
    CUDPPHandle cudpp;

--- a/platforms/cuda/src/kernels/kCShake.cu
+++ b/platforms/cuda/src/kernels/kCShake.cu
@@ -27,12 +27,9 @@
 #include <cuda.h>
 #include <vector_functions.h>
 #include <vector>
-#include "jama_svd.h"
 #include "gputypes.h"
 using namespace std;
-using TNT::Array2D;
-using JAMA::SVD;
 static __constant__ cudaGmxSimulation cSim;
@@ -204,127 +201,13 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
        cSim.pSyncCounter[blockIdx.x] = -1;
 }
-static void initInverseMatrices(gpuContext gpu, bool useNewPositions)
-{
-    // Build the inverse constraint matrix for each cluster.
-    gpu->psPosq4->Download();
-    gpu->psVelm4->Download();
-    if (useNewPositions)
-        gpu->psPosqP4->Download();
-    unsigned int elementIndex = 0;
-    for (unsigned int i = 0; i < gpu->sim.rigidClusters; i++) {
-        // Compute the constraint coupling matrix for this cluster.
-        unsigned int startIndex = (*gpu->psRigidClusterConstraintIndex)[i];
-        unsigned int endIndex = (*gpu->psRigidClusterConstraintIndex)[i+1];
-        unsigned int size = endIndex-startIndex;
-        vector<float3> r(size);
-        for (unsigned int j = 0; j < size; j++) {
-            int2 atoms = (*gpu->psLincsAtoms)[startIndex+j];
-            float4 pos1, pos2;
-            if (useNewPositions) {
-                float4 oldpos1 = (*gpu->psPosq4)[atoms.x];
-                float4 oldpos2 = (*gpu->psPosq4)[atoms.y];
-                pos1 = (*gpu->psPosqP4)[atoms.x];
-                pos2 = (*gpu->psPosqP4)[atoms.y];
-                pos1.x += oldpos1.x;
-                pos1.y += oldpos1.y;
-                pos1.z += oldpos1.z;
-                pos2.x += oldpos2.x;
-                pos2.y += oldpos2.y;
-                pos2.z += oldpos2.z;
-            }
-            else {
-                pos1 = (*gpu->psPosq4)[atoms.x];
-                pos2 = (*gpu->psPosq4)[atoms.y];
-            }
-            r[j] = make_float3(pos1.x-pos2.x, pos1.y-pos2.y, pos1.z-pos2.z);
-            float invLength = 1.0f/sqrt(r[j].x*r[j].x + r[j].y*r[j].y + r[j].z*r[j].z);
-            r[j].x *= invLength;
-            r[j].y *= invLength;
-            r[j].z *= invLength;
-        }
-        Array2D<double> matrix(size, size);
-        for (int j = 0; j < (int)size; j++) {
-            int2 atomsj = (*gpu->psLincsAtoms)[startIndex+j];
-            for (int k = 0; k < (int)size; k++) {
-                int2 atomsk = (*gpu->psLincsAtoms)[startIndex+k];
-                float invMassj0 = (*gpu->psVelm4)[atomsj.x].w;
-                float invMassj1 = (*gpu->psVelm4)[atomsj.y].w;
-                double dot = r[j].x*r[k].x + r[j].y*r[k].y + r[j].z*r[k].z;
-                if (atomsj.x == atomsk.x)
-                    dot *= invMassj0/(invMassj0+invMassj1);
-                else if (atomsj.y == atomsk.y)
-                    dot *= invMassj1/(invMassj0+invMassj1);
-                else if (atomsj.x == atomsk.y)
-                    dot *= -invMassj0/(invMassj0+invMassj1);
-                else if (atomsj.y == atomsk.x)
-                    dot *= -invMassj1/(invMassj0+invMassj1);
-                else
-                    dot = 0.0;
-                matrix[j][k] = dot;
-            }
-            matrix[j][j] = 1.0;
-        }
-        // Invert it using SVD.
-        Array2D<double> u, v;
-        Array1D<double> w;
-        SVD<double> svd(matrix);
-        svd.getU(u);
-        svd.getV(v);
-        svd.getSingularValues(w);
-        double singularValueCutoff = 0.01*w[0];
-        for (int j = 0; j < (int)size; j++)
-            w[j] = (w[j] < singularValueCutoff ? 0.0 : 1.0/w[j]);
-        for (int j = 0; j < (int)size; j++) {
-            for (int k = 0; k < (int)size; k++) {
-                matrix[j][k] = 0.0;
-                for (int m = 0; m < (int)size; m++)
-                    matrix[j][k] += v[j][m]*w[m]*u[k][m];
-            }
-        }
-        // Record the inverted matrix.
-        (*gpu->psRigidClusterMatrixIndex)[i] = elementIndex;
-        for (int j = 0; j < (int)size; j++)
-        {
-            float distance1 = (*gpu->psLincsDistance)[startIndex+j].w;
-            for (int k = 0; k < (int)size; k++)
-            {
-                float distance2 = (*gpu->psLincsDistance)[startIndex+k].w;
-                (*gpu->psRigidClusterMatrix)[elementIndex++] = (float)(matrix[k][j]*distance1/distance2);
-            }
-        }
-    }
-    (*gpu->psRigidClusterMatrixIndex)[gpu->sim.rigidClusters] = elementIndex;
-    gpu->psRigidClusterMatrix->Upload();
-    gpu->psRigidClusterMatrixIndex->Upload();
-}
 void kApplyFirstCShake(gpuContext gpu)
 {
 //    printf("kApplyFirstCShake\n");
    if (gpu->sim.lincsConstraints > 0)
    {
-        if (!gpu->hasInitializedRigidClusters)
-        {
-            // Build preliminary constraint matrices for use on this call.
-            initInverseMatrices(gpu, false);
-        }
        kApplyCShake_kernel<<<gpu->sim.blocks, gpu->sim.lincs_threads_per_block, 4*gpu->sim.lincs_threads_per_block>>>(gpu->sim.pPosqP, true);
        LAUNCHERROR("kApplyCShake");
-        if (!gpu->hasInitializedRigidClusters)
-        {
-            // Rebuild the constraint matrices, now that we know all constraints are really satisfied.
-            initInverseMatrices(gpu, true);
-            gpu->hasInitializedRigidClusters = true;
-        }
    }
 }