Commit 04b90973 authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed several bugs in LINCS

parent 8a83a2fb
...@@ -1641,6 +1641,13 @@ static void findMoleculeGroups(gpuContext gpu) ...@@ -1641,6 +1641,13 @@ static void findMoleculeGroups(gpuContext gpu)
constraints.push_back(Constraint(atom1, atom3, distance12*distance12)); constraints.push_back(Constraint(atom1, atom3, distance12*distance12));
constraints.push_back(Constraint(atom2, atom3, distance23*distance23)); constraints.push_back(Constraint(atom2, atom3, distance23*distance23));
} }
for (int i = 0; i < gpu->sim.lincsConstraints; i++)
{
int atom1 = (*gpu->psLincsAtoms)[i].x;
int atom2 = (*gpu->psLincsAtoms)[i].y;
float distance2 = (*gpu->psLincsDistance)[i].w;
constraints.push_back(Constraint(atom1, atom2, distance2));
}
// First make a list of every other atom to which each atom is connect by a bond or constraint. // First make a list of every other atom to which each atom is connect by a bond or constraint.
......
...@@ -29,13 +29,8 @@ ...@@ -29,13 +29,8 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. * * USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h> #include <cuda.h>
#include <vector_functions.h> #include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
//#include <fstream>
using namespace std; using namespace std;
#include "gputypes.h" #include "gputypes.h"
...@@ -57,48 +52,10 @@ void GetLincsSim(gpuContext gpu) ...@@ -57,48 +52,10 @@ void GetLincsSim(gpuContext gpu)
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed"); RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
} }
/** __global__ void kUpdateAtomPositions_kernel(float4* atomPositions)
* Synchronize all threads across all blocks.
*/
__device__ void kSyncAllThreads_kernel(unsigned int* syncCounter)
{ {
__syncthreads(); // Update the atom positions based on the solution to the matrix equations.
if (threadIdx.x == 0)
atomicInc(syncCounter, gridDim.x-1);
__shared__ int counterValue;
do
{
if (threadIdx.x == 0)
counterValue = *syncCounter;
} while (counterValue > 0);
}
__device__ void kSolveMatrix_kernel(unsigned int* syncCounter)
{
for (unsigned int iteration = 0; iteration < cSim.lincsTerms; iteration++) {
float* rhs1 = (iteration%2 == 0 ? cSim.pLincsRhs1 : cSim.pLincsRhs2);
float* rhs2 = (iteration%2 == 0 ? cSim.pLincsRhs2 : cSim.pLincsRhs1);
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
while (pos < cSim.lincsConstraints)
{
float rhs = 0.0f;
int start = cSim.pLincsConnectionsIndex[pos];
int end = cSim.pLincsConnectionsIndex[pos+1];
for (int i = start; i < end; i++)
{
int otherConstraint = cSim.pLincsConnections[i];
rhs += cSim.pLincsCoupling[i]*rhs1[otherConstraint];
}
rhs2[pos] = rhs;
cSim.pLincsSolution[pos] += rhs;
pos += blockDim.x * gridDim.x;
}
kSyncAllThreads_kernel(&syncCounter[iteration]);
}
}
__device__ void kUpdateAtomPositions_kernel(float4* atomPositions)
{
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x; unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
while (pos < cSim.atoms) while (pos < cSim.atoms)
{ {
...@@ -121,7 +78,30 @@ __device__ void kUpdateAtomPositions_kernel(float4* atomPositions) ...@@ -121,7 +78,30 @@ __device__ void kUpdateAtomPositions_kernel(float4* atomPositions)
} }
} }
__global__ void kApplyLincs_kernel(float4* atomPositions, bool addOldPosition) __global__ void kIterateLincsMatrix_kernel(int iteration)
{
// Perform one iteration of inverting the matrix.
float* rhs1 = (iteration%2 == 0 ? cSim.pLincsRhs1 : cSim.pLincsRhs2);
float* rhs2 = (iteration%2 == 0 ? cSim.pLincsRhs2 : cSim.pLincsRhs1);
unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
while (pos < cSim.lincsConstraints)
{
float rhs = 0.0f;
int start = cSim.pLincsConnectionsIndex[pos];
int end = cSim.pLincsConnectionsIndex[pos+1];
for (int i = start; i < end; i++)
{
int otherConstraint = cSim.pLincsConnections[i];
rhs += cSim.pLincsCoupling[i]*rhs1[otherConstraint];
}
rhs2[pos] = rhs;
cSim.pLincsSolution[pos] += rhs;
pos += blockDim.x * gridDim.x;
}
}
__global__ void kApplyLincsPart1_kernel(float4* atomPositions, bool addOldPosition)
{ {
// Calculate the direction of each constraint, along with the initial RHS and solution vectors. // Calculate the direction of each constraint, along with the initial RHS and solution vectors.
...@@ -156,11 +136,13 @@ __global__ void kApplyLincs_kernel(float4* atomPositions, bool addOldPosition) ...@@ -156,11 +136,13 @@ __global__ void kApplyLincs_kernel(float4* atomPositions, bool addOldPosition)
cSim.pLincsSolution[pos] = diff; cSim.pLincsSolution[pos] = diff;
pos += blockDim.x * gridDim.x; pos += blockDim.x * gridDim.x;
} }
kSyncAllThreads_kernel(cSim.pSyncCounter); }
__global__ void kApplyLincsPart2_kernel()
{
// Build the coupling matrix. // Build the coupling matrix.
pos = threadIdx.x + blockIdx.x * blockDim.x; unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
while (pos < cSim.lincsConstraints) while (pos < cSim.lincsConstraints)
{ {
float4 dir1 = cSim.pLincsDistance[pos]; float4 dir1 = cSim.pLincsDistance[pos];
...@@ -179,16 +161,13 @@ __global__ void kApplyLincs_kernel(float4* atomPositions, bool addOldPosition) ...@@ -179,16 +161,13 @@ __global__ void kApplyLincs_kernel(float4* atomPositions, bool addOldPosition)
} }
pos += blockDim.x * gridDim.x; pos += blockDim.x * gridDim.x;
} }
}
// Solve the matrix equation and update the atom positions. __global__ void kApplyLincsPart3_kernel(float4* atomPositions, bool addOldPosition)
{
kSolveMatrix_kernel(cSim.pSyncCounter+1);
kUpdateAtomPositions_kernel(atomPositions);
kSyncAllThreads_kernel(cSim.pSyncCounter+cSim.lincsTerms+1);
// Correct for rotational lengthening. // Correct for rotational lengthening.
pos = threadIdx.x + blockIdx.x * blockDim.x; unsigned int pos = threadIdx.x + blockIdx.x * blockDim.x;
while (pos < cSim.lincsConstraints) while (pos < cSim.lincsConstraints)
{ {
int2 atoms = cSim.pLincsAtoms[pos]; int2 atoms = cSim.pLincsAtoms[pos];
...@@ -215,29 +194,42 @@ __global__ void kApplyLincs_kernel(float4* atomPositions, bool addOldPosition) ...@@ -215,29 +194,42 @@ __global__ void kApplyLincs_kernel(float4* atomPositions, bool addOldPosition)
cSim.pLincsSolution[pos] = diff; cSim.pLincsSolution[pos] = diff;
pos += blockDim.x * gridDim.x; pos += blockDim.x * gridDim.x;
} }
}
// Solve the matrix equation and update the atom positions. static void kApplyLincs(gpuContext gpu, float4* atomPositions, bool addOldPosition)
{
kSolveMatrix_kernel(cSim.pSyncCounter+cSim.lincsTerms+2); kApplyLincsPart1_kernel<<<gpu->sim.blocks, gpu->sim.lincs_threads_per_block>>>(atomPositions, addOldPosition);
kUpdateAtomPositions_kernel(atomPositions); LAUNCHERROR("kApplyLincsPart1");
kApplyLincsPart2_kernel<<<gpu->sim.blocks, gpu->sim.lincs_threads_per_block>>>();
LAUNCHERROR("kApplyLincsPart2");
for (int i = 0; i < gpu->sim.lincsTerms; ++i)
{
kIterateLincsMatrix_kernel<<<gpu->sim.blocks, gpu->sim.lincs_threads_per_block>>>(i);
LAUNCHERROR("kIterateLincsMatrix_kernel");
}
kUpdateAtomPositions_kernel<<<gpu->sim.blocks, gpu->sim.lincs_threads_per_block>>>(atomPositions);
LAUNCHERROR("kUpdateAtomPositions");
kApplyLincsPart3_kernel<<<gpu->sim.blocks, gpu->sim.lincs_threads_per_block>>>(atomPositions, addOldPosition);
LAUNCHERROR("kApplyLincsPart3");
for (int i = 0; i < gpu->sim.lincsTerms; ++i)
{
kIterateLincsMatrix_kernel<<<gpu->sim.blocks, gpu->sim.lincs_threads_per_block>>>(i);
LAUNCHERROR("kIterateLincsMatrix_kernel");
}
kUpdateAtomPositions_kernel<<<gpu->sim.blocks, gpu->sim.lincs_threads_per_block>>>(atomPositions);
LAUNCHERROR("kUpdateAtomPositions");
} }
void kApplyFirstLincs(gpuContext gpu) void kApplyFirstLincs(gpuContext gpu)
{ {
// printf("kApplyFirstLincs\n"); // printf("kApplyFirstLincs\n");
if (gpu->sim.lincsConstraints > 0) if (gpu->sim.lincsConstraints > 0)
{ kApplyLincs(gpu, gpu->sim.pPosqP, true);
kApplyLincs_kernel<<<gpu->sim.blocks, gpu->sim.lincs_threads_per_block>>>(gpu->sim.pPosqP, true);
LAUNCHERROR("kApplyFirstLincs");
}
} }
void kApplySecondLincs(gpuContext gpu) void kApplySecondLincs(gpuContext gpu)
{ {
// printf("kApplySecondLincs\n"); // printf("kApplySecondLincs\n");
if (gpu->sim.lincsConstraints > 0) if (gpu->sim.lincsConstraints > 0)
{ kApplyLincs(gpu, gpu->sim.pPosq, false);
kApplyLincs_kernel<<<gpu->sim.blocks, gpu->sim.lincs_threads_per_block>>>(gpu->sim.pPosq, false);
LAUNCHERROR("kApplySecondLincs");
}
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment