/** * Compute the direction each constraint is pointing in. This is called once at the beginning of constraint evaluation. */ __kernel void computeConstraintDirections(__global int2* constraintAtoms, __global float4* constraintDistance, __global float4* atomPositions, __global int* converged) { for (int index = get_global_id(0); index < NUM_CONSTRAINTS; index += get_global_size(0)) { // Compute the direction for this constraint. int2 atoms = constraintAtoms[index]; float4 dir = constraintDistance[index]; float4 oldPos1 = atomPositions[atoms.x]; float4 oldPos2 = atomPositions[atoms.y]; dir.x = oldPos1.x-oldPos2.x; dir.y = oldPos1.y-oldPos2.y; dir.z = oldPos1.z-oldPos2.z; constraintDistance[index] = dir; } // Mark that no work groups have converged yet. for (int index = get_global_id(0); index < get_num_groups(0); index += get_global_size(0)) converged[index] = false; } /** * Compute the force applied by each constraint. */ __kernel void computeConstraintForce(__global int2* constraintAtoms, __global float4* constraintDistance, __global float4* atomPositions, __global float* reducedMass, __global float* delta1, __global int* converged, __local int* convergedBuffer, float tol) { if (converged[get_group_id(0)]) return; // The constraint iteration has already converged. float lowerTol = 1.0f-2.0f*tol+tol*tol; float upperTol = 1.0f+2.0f*tol+tol*tol; int threadConverged = 1; for (int index = get_global_id(0); index < NUM_CONSTRAINTS; index += get_global_size(0)) { // Compute the force due to this constraint. int2 atoms = constraintAtoms[index]; float4 dir = constraintDistance[index]; float4 rp_ij = atomPositions[atoms.x]-atomPositions[atoms.y]; rp_ij.xyz += dir.xyz; float rp2 = rp_ij.x*rp_ij.x + rp_ij.y*rp_ij.y + rp_ij.z*rp_ij.z; float dist2 = dir.w*dir.w; float diff = dist2 - rp2; float rrpr = rp_ij.x*dir.x + rp_ij.y*dir.y + rp_ij.z*dir.z; float d_ij2 = dir.x*dir.x + dir.y*dir.y + dir.z*dir.z; delta1[index] = (rrpr > d_ij2*1e-6f ? reducedMass[index]*diff/rrpr : 0.0f); // See whether it has converged. threadConverged &= (rp2 >= lowerTol*dist2 && rp2 <= upperTol*dist2); } // Perform a parallel reduction to see if all constraints handled by this work group have converged. convergedBuffer[get_local_id(0)] = threadConverged; barrier(CLK_LOCAL_MEM_FENCE); for (int step = 1; step < get_local_size(0); step *= 2) { if (get_local_id(0)%(2*step) == 0) convergedBuffer[get_local_id(0)] &= convergedBuffer[get_local_id(0)+step]; barrier(CLK_LOCAL_MEM_FENCE); } if (get_local_id(0) == 0) converged[get_group_id(0)] = convergedBuffer[0]; } /** * Multiply the vector of constraint forces by the constraint matrix. */ __kernel void multiplyByConstraintMatrix(__global float* delta1, __global float* delta2, __global int* constraintMatrixColumn, __global float* constraintMatrixValue, __global int* converged, __local int* convergedBuffer) { // First see whether all work groups have converged. convergedBuffer[get_local_id(0)] = true; for (int index = get_local_id(0); index < get_num_groups(0); index += get_local_size(0)) convergedBuffer[get_local_id(0)] &= converged[index]; barrier(CLK_LOCAL_MEM_FENCE); for (int step = 1; step < get_local_size(0); step *= 2) { if (get_local_id(0)%(2*step) == 0) convergedBuffer[get_local_id(0)] &= convergedBuffer[get_local_id(0)+step]; barrier(CLK_LOCAL_MEM_FENCE); } if (get_local_id(0) == 0) converged[get_group_id(0)] = convergedBuffer[0]; if (converged[0]) return; // The constraint iteration has already converged. // Multiply by the inverse constraint matrix. for (int index = get_global_id(0); index < NUM_CONSTRAINTS; index += get_global_size(0)) { float sum = 0.0f; for (int i = 0; ; i++) { int element = index+i*NUM_CONSTRAINTS; int column = constraintMatrixColumn[element]; if (column >= NUM_CONSTRAINTS) break; sum += delta1[column]*constraintMatrixValue[element]; } delta2[index] = sum; } } /** * Update the atom positions based on constraint forces. */ __kernel void updateAtomPositions(__global int* numAtomConstraints, __global int* atomConstraints, __global float4* constraintDistance, __global float4* atomPositions, __global float4* velm, __global float* delta1, __global float* delta2, __global int* converged, int iteration) { if (converged[get_group_id(0)]) return; // The constraint iteration has already converged. float damping = (iteration < 2 ? 0.5f : 1.0f); for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) { // Compute the new position of this atom. float4 atomPos = atomPositions[index]; float invMass = velm[index].w; int num = numAtomConstraints[index]; for (int i = 0; i < num; i++) { int constraint = atomConstraints[index+i*NUM_ATOMS]; bool forward = (constraint > 0); constraint = (forward ? constraint-1 : -constraint-1); float constraintForce = damping*invMass*delta2[constraint]; constraintForce = (forward ? constraintForce : -constraintForce); float4 dir = constraintDistance[constraint]; atomPos.x += constraintForce*dir.x; atomPos.y += constraintForce*dir.y; atomPos.z += constraintForce*dir.z; } atomPositions[index] = atomPos; } }