"platforms/opencl/vscode:/vscode.git/clone" did not exist on "81f266834348d1bfa7a6506c62a05c06760710bf"
Commit 8d6a2a01 authored by Peter Eastman's avatar Peter Eastman
Browse files

Beginnings of mixed/double precision support in OpenCL

parent a3d5f834
mixed4 loadPos(__global const real4* restrict posq, __global const real4* restrict posqCorrection, int index) {
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
return (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
return posq[index];
#endif
}
/**
* Enforce constraints on SHAKE clusters
*/
__kernel void applyShakeToHydrogens(int numClusters, float tol, __global const float4* restrict oldPos, __global float4* restrict posDelta, __global const int4* restrict clusterAtoms, __global const float4* restrict clusterParams) {
__kernel void applyShakeToHydrogens(int numClusters, mixed tol, __global const real4* restrict oldPos, __global const real4* restrict posCorrection, __global mixed4* restrict posDelta, __global const int4* restrict clusterAtoms, __global const float4* restrict clusterParams) {
int index = get_global_id(0);
while (index < numClusters) {
// Load the data for this cluster.
int4 atoms = clusterAtoms[index];
float4 params = clusterParams[index];
float4 pos = oldPos[atoms.x];
float4 xpi = posDelta[atoms.x];
float4 pos1 = oldPos[atoms.y];
float4 xpj1 = posDelta[atoms.y];
float4 pos2 = {0.0f, 0.0f, 0.0f, 0.0f};
float4 xpj2 = {0.0f, 0.0f, 0.0f, 0.0f};
mixed4 pos = loadPos(oldPos, posCorrection, atoms.x);
mixed4 xpi = posDelta[atoms.x];
mixed4 pos1 = loadPos(oldPos, posCorrection, atoms.y);
mixed4 xpj1 = posDelta[atoms.y];
mixed4 pos2 = {0.0f, 0.0f, 0.0f, 0.0f};
mixed4 xpj2 = {0.0f, 0.0f, 0.0f, 0.0f};
float invMassCentral = params.x;
float avgMass = params.y;
float d2 = params.z;
float invMassPeripheral = params.w;
if (atoms.z != -1) {
pos2 = oldPos[atoms.z];
pos2 = loadPos(oldPos, posCorrection, atoms.z);
xpj2 = posDelta[atoms.z];
}
float4 pos3 = {0.0f, 0.0f, 0.0f, 0.0f};
float4 xpj3 = {0.0f, 0.0f, 0.0f, 0.0f};
mixed4 pos3 = {0.0f, 0.0f, 0.0f, 0.0f};
mixed4 xpj3 = {0.0f, 0.0f, 0.0f, 0.0f};
if (atoms.w != -1) {
pos3 = oldPos[atoms.w];
pos3 = loadPos(oldPos, posCorrection, atoms.w);
xpj3 = posDelta[atoms.w];
}
// Precompute quantities.
float4 rij1 = pos-pos1;
float4 rij2 = pos-pos2;
float4 rij3 = pos-pos3;
float rij1sq = rij1.x*rij1.x + rij1.y*rij1.y + rij1.z*rij1.z;
float rij2sq = rij2.x*rij2.x + rij2.y*rij2.y + rij2.z*rij2.z;
float rij3sq = rij3.x*rij3.x + rij3.y*rij3.y + rij3.z*rij3.z;
float ld1 = d2-rij1sq;
float ld2 = d2-rij2sq;
float ld3 = d2-rij3sq;
mixed4 rij1 = pos-pos1;
mixed4 rij2 = pos-pos2;
mixed4 rij3 = pos-pos3;
mixed rij1sq = rij1.x*rij1.x + rij1.y*rij1.y + rij1.z*rij1.z;
mixed rij2sq = rij2.x*rij2.x + rij2.y*rij2.y + rij2.z*rij2.z;
mixed rij3sq = rij3.x*rij3.x + rij3.y*rij3.y + rij3.z*rij3.z;
mixed ld1 = d2-rij1sq;
mixed ld2 = d2-rij2sq;
mixed ld3 = d2-rij3sq;
// Iterate until convergence.
......@@ -49,10 +59,10 @@ __kernel void applyShakeToHydrogens(int numClusters, float tol, __global const f
while (iteration < 15 && !converged) {
converged = true;
#ifdef CONSTRAIN_VELOCITIES
float4 rpij = xpi-xpj1;
float rrpr = rpij.x*rij1.x + rpij.y*rij1.y + rpij.z*rij1.z;
float delta = -2.0f*avgMass*rrpr/rij1sq;
float4 dr = rij1*delta;
mixed4 rpij = xpi-xpj1;
mixed rrpr = rpij.x*rij1.x + rpij.y*rij1.y + rpij.z*rij1.z;
mixed delta = -2.0f*avgMass*rrpr/rij1sq;
mixed4 dr = rij1*delta;
xpi.xyz += dr.xyz*invMassCentral;
xpj1.xyz -= dr.xyz*invMassPeripheral;
if (fabs(delta) > tol)
......@@ -78,13 +88,13 @@ __kernel void applyShakeToHydrogens(int numClusters, float tol, __global const f
converged = false;
}
#else
float4 rpij = xpi-xpj1;
float rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z;
float rrpr = rij1.x*rpij.x + rij1.y*rpij.y + rij1.z*rpij.z;
float diff = fabs(ld1-2.0f*rrpr-rpsqij) / (d2*tol);
mixed4 rpij = xpi-xpj1;
mixed rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z;
mixed rrpr = rij1.x*rpij.x + rij1.y*rpij.y + rij1.z*rpij.z;
mixed diff = fabs(ld1-2.0f*rrpr-rpsqij) / (d2*tol);
if (diff >= 1.0f) {
float acor = (ld1-2.0f*rrpr-rpsqij)*avgMass / (rrpr+rij1sq);
float4 dr = rij1*acor;
mixed acor = (ld1-2.0f*rrpr-rpsqij)*avgMass / (rrpr+rij1sq);
mixed4 dr = rij1*acor;
xpi.xyz += dr.xyz*invMassCentral;
xpj1.xyz -= dr.xyz*invMassPeripheral;
converged = false;
......@@ -95,8 +105,8 @@ __kernel void applyShakeToHydrogens(int numClusters, float tol, __global const f
rrpr = rij2.x*rpij.x + rij2.y*rpij.y + rij2.z*rpij.z;
diff = fabs(ld2-2.0f*rrpr-rpsqij) / (d2*tol);
if (diff >= 1.0f) {
float acor = (ld2 - 2.0f*rrpr - rpsqij)*avgMass / (rrpr + rij2sq);
float4 dr = rij2*acor;
mixed acor = (ld2 - 2.0f*rrpr - rpsqij)*avgMass / (rrpr + rij2sq);
mixed4 dr = rij2*acor;
xpi.xyz += dr.xyz*invMassCentral;
xpj2.xyz -= dr.xyz*invMassPeripheral;
converged = false;
......@@ -108,8 +118,8 @@ __kernel void applyShakeToHydrogens(int numClusters, float tol, __global const f
rrpr = rij3.x*rpij.x + rij3.y*rpij.y + rij3.z*rpij.z;
diff = fabs(ld3 - 2.0f*rrpr - rpsqij) / (d2*tol);
if (diff >= 1.0f) {
float acor = (ld3-2.0f*rrpr-rpsqij)*avgMass / (rrpr+rij3sq);
float4 dr = rij3*acor;
mixed acor = (ld3-2.0f*rrpr-rpsqij)*avgMass / (rrpr+rij3sq);
mixed4 dr = rij3*acor;
xpi.xyz += dr.xyz*invMassCentral;
xpj3.xyz -= dr.xyz*invMassPeripheral;
converged = false;
......
#ifdef SUPPORTS_DOUBLE_PRECISION
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
/**
* Perform the first step of verlet integration.
*/
__kernel void integrateVerletPart1(int numAtoms, __global const float2* restrict dt, __global const float4* restrict posq, __global float4* restrict velm, __global const float4* restrict force, __global float4* restrict posDelta) {
float2 stepSize = dt[0];
float dtPos = stepSize.y;
float dtVel = 0.5f*(stepSize.x+stepSize.y);
__kernel void integrateVerletPart1(int numAtoms, __global const mixed2* restrict dt, __global const real4* restrict posq, __global const real4* restrict posqCorrection, __global mixed4* restrict velm, __global const real4* restrict force, __global mixed4* restrict posDelta) {
mixed2 stepSize = dt[0];
mixed dtPos = stepSize.y;
mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
int index = get_global_id(0);
while (index < numAtoms) {
float4 velocity = velm[index];
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
float4 pos = posq[index];
velocity.xyz += force[index].xyz*dtVel*velocity.w;
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
real4 pos = posq[index];
#endif
velocity.x += force[index].x*dtVel*velocity.w;
velocity.y += force[index].y*dtVel*velocity.w;
velocity.z += force[index].z*dtVel*velocity.w;
pos.xyz = velocity.xyz*dtPos;
posDelta[index] = pos;
velm[index] = velocity;
......@@ -28,8 +32,8 @@ __kernel void integrateVerletPart1(int numAtoms, __global const float2* restrict
* Perform the second step of verlet integration.
*/
__kernel void integrateVerletPart2(int numAtoms, __global float2* restrict dt, __global float4* restrict posq, __global float4* restrict velm, __global const float4* restrict posDelta) {
float2 stepSize = dt[0];
__kernel void integrateVerletPart2(int numAtoms, __global mixed2* restrict dt, __global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict velm, __global const mixed4* restrict posDelta) {
mixed2 stepSize = dt[0];
#ifdef SUPPORTS_DOUBLE_PRECISION
double oneOverDt = 1.0/stepSize.y;
#else
......@@ -40,17 +44,28 @@ __kernel void integrateVerletPart2(int numAtoms, __global float2* restrict dt, _
barrier(CLK_LOCAL_MEM_FENCE);
int index = get_global_id(0);
while (index < numAtoms) {
float4 velocity = velm[index];
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
float4 pos = posq[index];
float4 delta = posDelta[index];
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
real4 pos = posq[index];
#endif
mixed4 delta = posDelta[index];
pos.xyz += delta.xyz;
#ifdef SUPPORTS_DOUBLE_PRECISION
velocity.xyz = convert_float4(convert_double4(delta)*oneOverDt).xyz;
velocity.xyz = convert_mixed4(convert_double4(delta)*oneOverDt).xyz;
#else
velocity.xyz = delta.xyz*oneOverDt;
#endif
#ifdef USE_MIXED_PRECISION
posq[index] = convert_real4(pos);
posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
posq[index] = pos;
#endif
velm[index] = velocity;
}
index += get_global_size(0);
......@@ -61,14 +76,14 @@ __kernel void integrateVerletPart2(int numAtoms, __global float2* restrict dt, _
* Select the step size to use for the next step.
*/
__kernel void selectVerletStepSize(int numAtoms, float maxStepSize, float errorTol, __global float2* restrict dt, __global const float4* restrict velm, __global const float4* restrict force, __local float* restrict error) {
__kernel void selectVerletStepSize(int numAtoms, mixed maxStepSize, mixed errorTol, __global mixed2* restrict dt, __global const mixed4* restrict velm, __global const real4* restrict force, __local mixed* restrict error) {
// Calculate the error.
float err = 0.0f;
mixed err = 0;
int index = get_local_id(0);
while (index < numAtoms) {
float4 f = force[index];
float invMass = velm[index].w;
real4 f = force[index];
mixed invMass = velm[index].w;
err += (f.x*f.x + f.y*f.y + f.z*f.z)*invMass;
index += get_global_size(0);
}
......@@ -83,9 +98,9 @@ __kernel void selectVerletStepSize(int numAtoms, float maxStepSize, float errorT
barrier(CLK_LOCAL_MEM_FENCE);
}
if (get_local_id(0) == 0) {
float totalError = sqrt(error[0]/(numAtoms*3));
float newStepSize = sqrt(errorTol/totalError);
float oldStepSize = dt[0].y;
mixed totalError = sqrt(error[0]/(numAtoms*3));
mixed newStepSize = sqrt(errorTol/totalError);
mixed oldStepSize = dt[0].y;
if (oldStepSize > 0.0f)
newStepSize = min(newStepSize, oldStepSize*2.0f); // For safety, limit how quickly dt can increase.
if (newStepSize > oldStepSize && newStepSize < 1.1f*oldStepSize)
......
/**
* Load the position of a particle.
*/
mixed4 loadPos(__global const real4* restrict posq, __global const real4* restrict posqCorrection, int index) {
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
return (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
return posq[index];
#endif
}
/**
* Store the position of a particle.
*/
void storePos(__global real4* restrict posq, __global real4* restrict posqCorrection, int index, mixed4 pos) {
#ifdef USE_MIXED_PRECISION
posq[index] = (real4) ((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
posq[index] = pos;
#endif
}
/**
* Compute the positions of virtual sites
*/
__kernel void computeVirtualSites(__global float4* restrict posq, __global const int4* restrict avg2Atoms, __global const float2* restrict avg2Weights,
__global const int4* restrict avg3Atoms, __global const float4* restrict avg3Weights,
__global const int4* restrict outOfPlaneAtoms, __global const float4* restrict outOfPlaneWeights) {
__kernel void computeVirtualSites(__global real4* restrict posq, __global real4* restrict posqCorrection, __global const int4* restrict avg2Atoms,
__global const real2* restrict avg2Weights, __global const int4* restrict avg3Atoms, __global const real4* restrict avg3Weights,
__global const int4* restrict outOfPlaneAtoms, __global const real4* restrict outOfPlaneWeights) {
// Two particle average sites.
for (int index = get_global_id(0); index < NUM_2_AVERAGE; index += get_global_size(0)) {
int4 atoms = avg2Atoms[index];
float2 weights = avg2Weights[index];
float4 pos = posq[atoms.x];
float4 pos1 = posq[atoms.y];
float4 pos2 = posq[atoms.z];
real2 weights = avg2Weights[index];
mixed4 pos = loadPos(posq, posqCorrection, atoms.x);
mixed4 pos1 = loadPos(posq, posqCorrection, atoms.y);
mixed4 pos2 = loadPos(posq, posqCorrection, atoms.z);
pos.xyz = pos1.xyz*weights.x + pos2.xyz*weights.y;
posq[atoms.x] = pos;
storePos(posq, posqCorrection, atoms.x, pos);
}
// Three particle average sites.
for (int index = get_global_id(0); index < NUM_3_AVERAGE; index += get_global_size(0)) {
int4 atoms = avg3Atoms[index];
float4 weights = avg3Weights[index];
float4 pos = posq[atoms.x];
float4 pos1 = posq[atoms.y];
float4 pos2 = posq[atoms.z];
float4 pos3 = posq[atoms.w];
real4 weights = avg3Weights[index];
mixed4 pos = loadPos(posq, posqCorrection, atoms.x);
mixed4 pos1 = loadPos(posq, posqCorrection, atoms.y);
mixed4 pos2 = loadPos(posq, posqCorrection, atoms.z);
mixed4 pos3 = loadPos(posq, posqCorrection, atoms.w);
pos.xyz = pos1.xyz*weights.x + pos2.xyz*weights.y + pos3.xyz*weights.z;
posq[atoms.x] = pos;
storePos(posq, posqCorrection, atoms.x, pos);
}
// Out of plane sites.
for (int index = get_global_id(0); index < NUM_OUT_OF_PLANE; index += get_global_size(0)) {
int4 atoms = outOfPlaneAtoms[index];
float4 weights = outOfPlaneWeights[index];
float4 pos = posq[atoms.x];
float4 pos1 = posq[atoms.y];
float4 pos2 = posq[atoms.z];
float4 pos3 = posq[atoms.w];
float4 v12 = pos2-pos1;
float4 v13 = pos3-pos1;
real4 weights = outOfPlaneWeights[index];
mixed4 pos = loadPos(posq, posqCorrection, atoms.x);
mixed4 pos1 = loadPos(posq, posqCorrection, atoms.y);
mixed4 pos2 = loadPos(posq, posqCorrection, atoms.z);
mixed4 pos3 = loadPos(posq, posqCorrection, atoms.w);
mixed4 v12 = pos2-pos1;
mixed4 v13 = pos3-pos1;
pos.xyz = pos1.xyz + v12.xyz*weights.x + v13.xyz*weights.y + cross(v12, v13).xyz*weights.z;
posq[atoms.x] = pos;
storePos(posq, posqCorrection, atoms.x, pos);
}
}
/**
* Distribute forces from virtual sites to the atoms they are based on.
*/
__kernel void distributeForces(__global const float4* restrict posq, __global float4* restrict force,
__global const int4* restrict avg2Atoms, __global const float2* restrict avg2Weights,
__global const int4* restrict avg3Atoms, __global const float4* restrict avg3Weights,
__global const int4* restrict outOfPlaneAtoms, __global const float4* restrict outOfPlaneWeights) {
__kernel void distributeForces(__global const real4* restrict posq, __global real4* restrict posqCorrection, __global real4* restrict force,
__global const int4* restrict avg2Atoms, __global const real2* restrict avg2Weights,
__global const int4* restrict avg3Atoms, __global const real4* restrict avg3Weights,
__global const int4* restrict outOfPlaneAtoms, __global const real4* restrict outOfPlaneWeights) {
// Two particle average sites.
for (int index = get_global_id(0); index < NUM_2_AVERAGE; index += get_global_size(0)) {
int4 atoms = avg2Atoms[index];
float2 weights = avg2Weights[index];
float4 f = force[atoms.x];
float4 f1 = force[atoms.y];
float4 f2 = force[atoms.z];
real2 weights = avg2Weights[index];
real4 f = force[atoms.x];
real4 f1 = force[atoms.y];
real4 f2 = force[atoms.z];
f1.xyz += f.xyz*weights.x;
f2.xyz += f.xyz*weights.y;
force[atoms.y] = f1;
......@@ -72,11 +97,11 @@ __kernel void distributeForces(__global const float4* restrict posq, __global fl
for (int index = get_global_id(0); index < NUM_3_AVERAGE; index += get_global_size(0)) {
int4 atoms = avg3Atoms[index];
float4 weights = avg3Weights[index];
float4 f = force[atoms.x];
float4 f1 = force[atoms.y];
float4 f2 = force[atoms.z];
float4 f3 = force[atoms.w];
real4 weights = avg3Weights[index];
real4 f = force[atoms.x];
real4 f1 = force[atoms.y];
real4 f2 = force[atoms.z];
real4 f3 = force[atoms.w];
f1.xyz += f.xyz*weights.x;
f2.xyz += f.xyz*weights.y;
f3.xyz += f.xyz*weights.z;
......@@ -89,20 +114,20 @@ __kernel void distributeForces(__global const float4* restrict posq, __global fl
for (int index = get_global_id(0); index < NUM_OUT_OF_PLANE; index += get_global_size(0)) {
int4 atoms = outOfPlaneAtoms[index];
float4 weights = outOfPlaneWeights[index];
float4 pos1 = posq[atoms.y];
float4 pos2 = posq[atoms.z];
float4 pos3 = posq[atoms.w];
float4 v12 = pos2-pos1;
float4 v13 = pos3-pos1;
float4 f = force[atoms.x];
float4 f1 = force[atoms.y];
float4 f2 = force[atoms.z];
float4 f3 = force[atoms.w];
float4 fp2 = (float4) (weights.x*f.x - weights.z*v13.z*f.y + weights.z*v13.y*f.z,
real4 weights = outOfPlaneWeights[index];
mixed4 pos1 = loadPos(posq, posqCorrection, atoms.y);
mixed4 pos2 = loadPos(posq, posqCorrection, atoms.z);
mixed4 pos3 = loadPos(posq, posqCorrection, atoms.w);
mixed4 v12 = pos2-pos1;
mixed4 v13 = pos3-pos1;
real4 f = force[atoms.x];
real4 f1 = force[atoms.y];
real4 f2 = force[atoms.z];
real4 f3 = force[atoms.w];
real4 fp2 = (real4) (weights.x*f.x - weights.z*v13.z*f.y + weights.z*v13.y*f.z,
weights.z*v13.z*f.x + weights.x*f.y - weights.z*v13.x*f.z,
-weights.z*v13.y*f.x + weights.z*v13.x*f.y + weights.x*f.z, 0.0f);
float4 fp3 = (float4) (weights.y*f.x + weights.z*v12.z*f.y - weights.z*v12.y*f.z,
real4 fp3 = (real4) (weights.y*f.x + weights.z*v12.z*f.y - weights.z*v12.y*f.z,
-weights.z*v12.z*f.x + weights.y*f.y + weights.z*v12.x*f.z,
weights.z*v12.y*f.x - weights.z*v12.x*f.y + weights.y*f.z, 0.0f);
f1.xyz += f.xyz-fp2.xyz-fp3.xyz;
......
......@@ -51,7 +51,7 @@ using namespace std;
void testTransform() {
System system;
system.addParticle(0.0);
OpenCLPlatform::PlatformData platformData(system, "", "");
OpenCLPlatform::PlatformData platformData(system, "", "", "single");
OpenCLContext& context = *platformData.contexts[0];
context.initialize();
OpenMM_SFMT::SFMT sfmt;
......
......@@ -48,7 +48,7 @@ void testGaussian() {
System system;
for (int i = 0; i < numAtoms; i++)
system.addParticle(1.0);
OpenCLPlatform::PlatformData platformData(system, "", "");
OpenCLPlatform::PlatformData platformData(system, "", "", "single");
OpenCLContext& context = *platformData.contexts[0];
context.initialize();
context.getIntegrationUtilities().initRandomNumberGenerator(0);
......
......@@ -62,7 +62,7 @@ void verifySorting(vector<float> array) {
System system;
system.addParticle(0.0);
OpenCLPlatform::PlatformData platformData(system, "", "");
OpenCLPlatform::PlatformData platformData(system, "", "", "single");
OpenCLContext& context = *platformData.contexts[0];
context.initialize();
OpenCLArray data(context, array.size(), sizeof(float), "sortData");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment