Beginnings of mixed/double precision support in OpenCL

8d6a2a01 · Peter Eastman · a3d5f834 · 8d6a2a01 · 8d6a2a01 · 8d6a2a01
Commit 8d6a2a01 authored Oct 16, 2012 by Peter Eastman
6 changed files
--- a/platforms/opencl/src/kernels/shakeHydrogens.cl
+++ b/platforms/opencl/src/kernels/shakeHydrogens.cl
+mixed4 loadPos(__global const real4* restrict posq, __global const real4* restrict posqCorrection, int index) {
+#ifdef USE_MIXED_PRECISION
+    real4 pos1 = posq[index];
+    real4 pos2 = posqCorrection[index];
+    return (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
+#else
+    return posq[index];
+#endif
+}
+
 /**
 * Enforce constraints on SHAKE clusters
 */

-__kernel void applyShakeToHydrogens(int numClusters, float tol, __global const float4* restrict oldPos, __global float4* restrict posDelta, __global const int4* restrict clusterAtoms, __global const float4* restrict clusterParams) {
+__kernel void applyShakeToHydrogens(int numClusters, mixed tol, __global const real4* restrict oldPos, __global const real4* restrict posCorrection, __global mixed4* restrict posDelta, __global const int4* restrict clusterAtoms, __global const float4* restrict clusterParams) {
    int index = get_global_id(0);
    while (index < numClusters) {
        // Load the data for this cluster.

        int4 atoms = clusterAtoms[index];
        float4 params = clusterParams[index];
-        float4 pos = oldPos[atoms.x];
-        float4 xpi = posDelta[atoms.x];
-        float4 pos1 = oldPos[atoms.y];
-        float4 xpj1 = posDelta[atoms.y];
-        float4 pos2 = {0.0f, 0.0f, 0.0f, 0.0f};
-        float4 xpj2 = {0.0f, 0.0f, 0.0f, 0.0f};
+        mixed4 pos = loadPos(oldPos, posCorrection, atoms.x);
+        mixed4 xpi = posDelta[atoms.x];
+        mixed4 pos1 = loadPos(oldPos, posCorrection, atoms.y);
+        mixed4 xpj1 = posDelta[atoms.y];
+        mixed4 pos2 = {0.0f, 0.0f, 0.0f, 0.0f};
+        mixed4 xpj2 = {0.0f, 0.0f, 0.0f, 0.0f};
        float invMassCentral = params.x;
        float avgMass = params.y;
        float d2 = params.z;
        float invMassPeripheral = params.w;
        if (atoms.z != -1) {
-            pos2 = oldPos[atoms.z];
+            pos2 = loadPos(oldPos, posCorrection, atoms.z);
            xpj2 = posDelta[atoms.z];
        }
-        float4 pos3 = {0.0f, 0.0f, 0.0f, 0.0f};
-        float4 xpj3 = {0.0f, 0.0f, 0.0f, 0.0f};
+        mixed4 pos3 = {0.0f, 0.0f, 0.0f, 0.0f};
+        mixed4 xpj3 = {0.0f, 0.0f, 0.0f, 0.0f};
        if (atoms.w != -1) {
-            pos3 = oldPos[atoms.w];
+            pos3 = loadPos(oldPos, posCorrection, atoms.w);
            xpj3 = posDelta[atoms.w];
        }

        // Precompute quantities.

-        float4 rij1 = pos-pos1;
-        float4 rij2 = pos-pos2;
-        float4 rij3 = pos-pos3;
-        float rij1sq = rij1.x*rij1.x + rij1.y*rij1.y + rij1.z*rij1.z;
-        float rij2sq = rij2.x*rij2.x + rij2.y*rij2.y + rij2.z*rij2.z;
-        float rij3sq = rij3.x*rij3.x + rij3.y*rij3.y + rij3.z*rij3.z;
-        float ld1 = d2-rij1sq;
-        float ld2 = d2-rij2sq;
-        float ld3 = d2-rij3sq;
+        mixed4 rij1 = pos-pos1;
+        mixed4 rij2 = pos-pos2;
+        mixed4 rij3 = pos-pos3;
+        mixed rij1sq = rij1.x*rij1.x + rij1.y*rij1.y + rij1.z*rij1.z;
+        mixed rij2sq = rij2.x*rij2.x + rij2.y*rij2.y + rij2.z*rij2.z;
+        mixed rij3sq = rij3.x*rij3.x + rij3.y*rij3.y + rij3.z*rij3.z;
+        mixed ld1 = d2-rij1sq;
+        mixed ld2 = d2-rij2sq;
+        mixed ld3 = d2-rij3sq;

        // Iterate until convergence.

@@ -49,10 +59,10 @@ __kernel void applyShakeToHydrogens(int numClusters, float tol, __global const f
        while (iteration < 15 && !converged) {
            converged = true;
 #ifdef CONSTRAIN_VELOCITIES
-            float4 rpij = xpi-xpj1;
-            float rrpr = rpij.x*rij1.x + rpij.y*rij1.y + rpij.z*rij1.z;
-            float delta = -2.0f*avgMass*rrpr/rij1sq;
-            float4 dr = rij1*delta;
+            mixed4 rpij = xpi-xpj1;
+            mixed rrpr = rpij.x*rij1.x + rpij.y*rij1.y + rpij.z*rij1.z;
+            mixed delta = -2.0f*avgMass*rrpr/rij1sq;
+            mixed4 dr = rij1*delta;
            xpi.xyz += dr.xyz*invMassCentral;
            xpj1.xyz -= dr.xyz*invMassPeripheral;
            if (fabs(delta) > tol)
@@ -78,13 +88,13 @@ __kernel void applyShakeToHydrogens(int numClusters, float tol, __global const f
                    converged = false;
            }
 #else
-            float4 rpij = xpi-xpj1;
-            float rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z;
-            float rrpr = rij1.x*rpij.x + rij1.y*rpij.y + rij1.z*rpij.z;
-            float diff = fabs(ld1-2.0f*rrpr-rpsqij) / (d2*tol);
+            mixed4 rpij = xpi-xpj1;
+            mixed rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z;
+            mixed rrpr = rij1.x*rpij.x + rij1.y*rpij.y + rij1.z*rpij.z;
+            mixed diff = fabs(ld1-2.0f*rrpr-rpsqij) / (d2*tol);
            if (diff >= 1.0f) {
-                float acor  = (ld1-2.0f*rrpr-rpsqij)*avgMass / (rrpr+rij1sq);
-                float4 dr = rij1*acor;
+                mixed acor  = (ld1-2.0f*rrpr-rpsqij)*avgMass / (rrpr+rij1sq);
+                mixed4 dr = rij1*acor;
                xpi.xyz += dr.xyz*invMassCentral;
                xpj1.xyz -= dr.xyz*invMassPeripheral;
                converged = false;
@@ -95,8 +105,8 @@ __kernel void applyShakeToHydrogens(int numClusters, float tol, __global const f
                rrpr = rij2.x*rpij.x + rij2.y*rpij.y + rij2.z*rpij.z;
                diff = fabs(ld2-2.0f*rrpr-rpsqij) / (d2*tol);
                if (diff >= 1.0f) {
-                    float acor  = (ld2 - 2.0f*rrpr - rpsqij)*avgMass / (rrpr + rij2sq);
-                    float4 dr = rij2*acor;
+                    mixed acor  = (ld2 - 2.0f*rrpr - rpsqij)*avgMass / (rrpr + rij2sq);
+                    mixed4 dr = rij2*acor;
                    xpi.xyz += dr.xyz*invMassCentral;
                    xpj2.xyz -= dr.xyz*invMassPeripheral;
                    converged = false;
@@ -108,8 +118,8 @@ __kernel void applyShakeToHydrogens(int numClusters, float tol, __global const f
                rrpr = rij3.x*rpij.x + rij3.y*rpij.y + rij3.z*rpij.z;
                diff = fabs(ld3 - 2.0f*rrpr - rpsqij) / (d2*tol);
                if (diff >= 1.0f) {
-                    float acor  = (ld3-2.0f*rrpr-rpsqij)*avgMass / (rrpr+rij3sq);
-                    float4 dr = rij3*acor;
+                    mixed acor  = (ld3-2.0f*rrpr-rpsqij)*avgMass / (rrpr+rij3sq);
+                    mixed4 dr = rij3*acor;
                    xpi.xyz += dr.xyz*invMassCentral;
                    xpj3.xyz -= dr.xyz*invMassPeripheral;
                    converged = false;

--- a/platforms/opencl/src/kernels/verlet.cl
+++ b/platforms/opencl/src/kernels/verlet.cl
-#ifdef SUPPORTS_DOUBLE_PRECISION
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-#endif
-
 /**
 * Perform the first step of verlet integration.
 */

-__kernel void integrateVerletPart1(int numAtoms, __global const float2* restrict dt, __global const float4* restrict posq, __global float4* restrict velm, __global const float4* restrict force, __global float4* restrict posDelta) {
-    float2 stepSize = dt[0];
-    float dtPos = stepSize.y;
-    float dtVel = 0.5f*(stepSize.x+stepSize.y);
+__kernel void integrateVerletPart1(int numAtoms, __global const mixed2* restrict dt, __global const real4* restrict posq, __global const real4* restrict posqCorrection, __global mixed4* restrict velm, __global const real4* restrict force, __global mixed4* restrict posDelta) {
+    mixed2 stepSize = dt[0];
+    mixed dtPos = stepSize.y;
+    mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
    int index = get_global_id(0);
    while (index < numAtoms) {
-        float4 velocity = velm[index];
+        mixed4 velocity = velm[index];
        if (velocity.w != 0.0) {
-            float4 pos = posq[index];
-            velocity.xyz += force[index].xyz*dtVel*velocity.w;
+#ifdef USE_MIXED_PRECISION
+            real4 pos1 = posq[index];
+            real4 pos2 = posqCorrection[index];
+            mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
+#else
+            real4 pos = posq[index];
+#endif
+            velocity.x += force[index].x*dtVel*velocity.w;
+            velocity.y += force[index].y*dtVel*velocity.w;
+            velocity.z += force[index].z*dtVel*velocity.w;
            pos.xyz = velocity.xyz*dtPos;
            posDelta[index] = pos;
            velm[index] = velocity;
@@ -28,8 +32,8 @@ __kernel void integrateVerletPart1(int numAtoms, __global const float2* restrict
 * Perform the second step of verlet integration.
 */

-__kernel void integrateVerletPart2(int numAtoms, __global float2* restrict dt, __global float4* restrict posq, __global float4* restrict velm, __global const float4* restrict posDelta) {
-    float2 stepSize = dt[0];
+__kernel void integrateVerletPart2(int numAtoms, __global mixed2* restrict dt, __global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict velm, __global const mixed4* restrict posDelta) {
+    mixed2 stepSize = dt[0];
 #ifdef SUPPORTS_DOUBLE_PRECISION
    double oneOverDt = 1.0/stepSize.y;
 #else
@@ -40,17 +44,28 @@ __kernel void integrateVerletPart2(int numAtoms, __global float2* restrict dt, _
    barrier(CLK_LOCAL_MEM_FENCE);
    int index = get_global_id(0);
    while (index < numAtoms) {
-        float4 velocity = velm[index];
+        mixed4 velocity = velm[index];
        if (velocity.w != 0.0) {
-            float4 pos = posq[index];
-            float4 delta = posDelta[index];
+#ifdef USE_MIXED_PRECISION
+            real4 pos1 = posq[index];
+            real4 pos2 = posqCorrection[index];
+            mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
+#else
+            real4 pos = posq[index];
+#endif
+            mixed4 delta = posDelta[index];
            pos.xyz += delta.xyz;
 #ifdef SUPPORTS_DOUBLE_PRECISION
-            velocity.xyz = convert_float4(convert_double4(delta)*oneOverDt).xyz;
+            velocity.xyz = convert_mixed4(convert_double4(delta)*oneOverDt).xyz;
 #else
            velocity.xyz = delta.xyz*oneOverDt;
 #endif
+#ifdef USE_MIXED_PRECISION
+            posq[index] = convert_real4(pos);
+            posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
+#else
            posq[index] = pos;
+#endif
            velm[index] = velocity;
        }
        index += get_global_size(0);
@@ -61,14 +76,14 @@ __kernel void integrateVerletPart2(int numAtoms, __global float2* restrict dt, _
 * Select the step size to use for the next step.
 */

-__kernel void selectVerletStepSize(int numAtoms, float maxStepSize, float errorTol, __global float2* restrict dt, __global const float4* restrict velm, __global const float4* restrict force, __local float* restrict error) {
+__kernel void selectVerletStepSize(int numAtoms, mixed maxStepSize, mixed errorTol, __global mixed2* restrict dt, __global const mixed4* restrict velm, __global const real4* restrict force, __local mixed* restrict error) {
    // Calculate the error.

-    float err = 0.0f;
+    mixed err = 0;
    int index = get_local_id(0);
    while (index < numAtoms) {
-        float4 f = force[index];
-        float invMass = velm[index].w;
+        real4 f = force[index];
+        mixed invMass = velm[index].w;
        err += (f.x*f.x + f.y*f.y + f.z*f.z)*invMass;
        index += get_global_size(0);
    }
@@ -83,9 +98,9 @@ __kernel void selectVerletStepSize(int numAtoms, float maxStepSize, float errorT
        barrier(CLK_LOCAL_MEM_FENCE);
    }
    if (get_local_id(0) == 0) {
-        float totalError = sqrt(error[0]/(numAtoms*3));
-        float newStepSize = sqrt(errorTol/totalError);
-        float oldStepSize = dt[0].y;
+        mixed totalError = sqrt(error[0]/(numAtoms*3));
+        mixed newStepSize = sqrt(errorTol/totalError);
+        mixed oldStepSize = dt[0].y;
        if (oldStepSize > 0.0f)
            newStepSize = min(newStepSize, oldStepSize*2.0f); // For safety, limit how quickly dt can increase.
        if (newStepSize > oldStepSize && newStepSize < 1.1f*oldStepSize)

--- a/platforms/opencl/src/kernels/virtualSites.cl
+++ b/platforms/opencl/src/kernels/virtualSites.cl
+/**
+ * Load the position of a particle.
+ */
+mixed4 loadPos(__global const real4* restrict posq, __global const real4* restrict posqCorrection, int index) {
+#ifdef USE_MIXED_PRECISION
+    real4 pos1 = posq[index];
+    real4 pos2 = posqCorrection[index];
+    return (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
+#else
+    return posq[index];
+#endif
+}
+
+/**
+ * Store the position of a particle.
+ */
+void storePos(__global real4* restrict posq, __global real4* restrict posqCorrection, int index, mixed4 pos) {
+#ifdef USE_MIXED_PRECISION
+    posq[index] = (real4) ((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
+    posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
+#else
+    posq[index] = pos;
+#endif
+}
+
 /**
 * Compute the positions of virtual sites
 */
-__kernel void computeVirtualSites(__global float4* restrict posq, __global const int4* restrict avg2Atoms, __global const float2* restrict avg2Weights,
-        __global const int4* restrict avg3Atoms, __global const float4* restrict avg3Weights,
-        __global const int4* restrict outOfPlaneAtoms, __global const float4* restrict outOfPlaneWeights) {
+__kernel void computeVirtualSites(__global real4* restrict posq, __global real4* restrict posqCorrection, __global const int4* restrict avg2Atoms,
+        __global const real2* restrict avg2Weights, __global const int4* restrict avg3Atoms, __global const real4* restrict avg3Weights,
+        __global const int4* restrict outOfPlaneAtoms, __global const real4* restrict outOfPlaneWeights) {
    
    // Two particle average sites.
    
    for (int index = get_global_id(0); index < NUM_2_AVERAGE; index += get_global_size(0)) {
        int4 atoms = avg2Atoms[index];
-        float2 weights = avg2Weights[index];
-        float4 pos = posq[atoms.x];
-        float4 pos1 = posq[atoms.y];
-        float4 pos2 = posq[atoms.z];
+        real2 weights = avg2Weights[index];
+        mixed4 pos = loadPos(posq, posqCorrection, atoms.x);
+        mixed4 pos1 = loadPos(posq, posqCorrection, atoms.y);
+        mixed4 pos2 = loadPos(posq, posqCorrection, atoms.z);
        pos.xyz = pos1.xyz*weights.x + pos2.xyz*weights.y;
-        posq[atoms.x] = pos;
+        storePos(posq, posqCorrection, atoms.x, pos);
    }
    
    // Three particle average sites.
    
    for (int index = get_global_id(0); index < NUM_3_AVERAGE; index += get_global_size(0)) {
        int4 atoms = avg3Atoms[index];
-        float4 weights = avg3Weights[index];
-        float4 pos = posq[atoms.x];
-        float4 pos1 = posq[atoms.y];
-        float4 pos2 = posq[atoms.z];
-        float4 pos3 = posq[atoms.w];
+        real4 weights = avg3Weights[index];
+        mixed4 pos = loadPos(posq, posqCorrection, atoms.x);
+        mixed4 pos1 = loadPos(posq, posqCorrection, atoms.y);
+        mixed4 pos2 = loadPos(posq, posqCorrection, atoms.z);
+        mixed4 pos3 = loadPos(posq, posqCorrection, atoms.w);
        pos.xyz = pos1.xyz*weights.x + pos2.xyz*weights.y + pos3.xyz*weights.z;
-        posq[atoms.x] = pos;
+        storePos(posq, posqCorrection, atoms.x, pos);
    }
    
    // Out of plane sites.
    
    for (int index = get_global_id(0); index < NUM_OUT_OF_PLANE; index += get_global_size(0)) {
        int4 atoms = outOfPlaneAtoms[index];
-        float4 weights = outOfPlaneWeights[index];
-        float4 pos = posq[atoms.x];
-        float4 pos1 = posq[atoms.y];
-        float4 pos2 = posq[atoms.z];
-        float4 pos3 = posq[atoms.w];
-        float4 v12 = pos2-pos1;
-        float4 v13 = pos3-pos1;
+        real4 weights = outOfPlaneWeights[index];
+        mixed4 pos = loadPos(posq, posqCorrection, atoms.x);
+        mixed4 pos1 = loadPos(posq, posqCorrection, atoms.y);
+        mixed4 pos2 = loadPos(posq, posqCorrection, atoms.z);
+        mixed4 pos3 = loadPos(posq, posqCorrection, atoms.w);
+        mixed4 v12 = pos2-pos1;
+        mixed4 v13 = pos3-pos1;
        pos.xyz = pos1.xyz + v12.xyz*weights.x + v13.xyz*weights.y + cross(v12, v13).xyz*weights.z;
-        posq[atoms.x] = pos;
+        storePos(posq, posqCorrection, atoms.x, pos);
    }
 }

 /**
 * Distribute forces from virtual sites to the atoms they are based on.
 */
-__kernel void distributeForces(__global const float4* restrict posq, __global float4* restrict force,
-        __global const int4* restrict avg2Atoms, __global const float2* restrict avg2Weights,
-        __global const int4* restrict avg3Atoms, __global const float4* restrict avg3Weights,
-        __global const int4* restrict outOfPlaneAtoms, __global const float4* restrict outOfPlaneWeights) {
+__kernel void distributeForces(__global const real4* restrict posq, __global real4* restrict posqCorrection, __global real4* restrict force,
+        __global const int4* restrict avg2Atoms, __global const real2* restrict avg2Weights,
+        __global const int4* restrict avg3Atoms, __global const real4* restrict avg3Weights,
+        __global const int4* restrict outOfPlaneAtoms, __global const real4* restrict outOfPlaneWeights) {
    
    // Two particle average sites.
    
    for (int index = get_global_id(0); index < NUM_2_AVERAGE; index += get_global_size(0)) {
        int4 atoms = avg2Atoms[index];
-        float2 weights = avg2Weights[index];
-        float4 f = force[atoms.x];
-        float4 f1 = force[atoms.y];
-        float4 f2 = force[atoms.z];
+        real2 weights = avg2Weights[index];
+        real4 f = force[atoms.x];
+        real4 f1 = force[atoms.y];
+        real4 f2 = force[atoms.z];
        f1.xyz += f.xyz*weights.x;
        f2.xyz += f.xyz*weights.y;
        force[atoms.y] = f1;
@@ -72,11 +97,11 @@ __kernel void distributeForces(__global const float4* restrict posq, __global fl
    
    for (int index = get_global_id(0); index < NUM_3_AVERAGE; index += get_global_size(0)) {
        int4 atoms = avg3Atoms[index];
-        float4 weights = avg3Weights[index];
-        float4 f = force[atoms.x];
-        float4 f1 = force[atoms.y];
-        float4 f2 = force[atoms.z];
-        float4 f3 = force[atoms.w];
+        real4 weights = avg3Weights[index];
+        real4 f = force[atoms.x];
+        real4 f1 = force[atoms.y];
+        real4 f2 = force[atoms.z];
+        real4 f3 = force[atoms.w];
        f1.xyz += f.xyz*weights.x;
        f2.xyz += f.xyz*weights.y;
        f3.xyz += f.xyz*weights.z;
@@ -89,20 +114,20 @@ __kernel void distributeForces(__global const float4* restrict posq, __global fl
    
    for (int index = get_global_id(0); index < NUM_OUT_OF_PLANE; index += get_global_size(0)) {
        int4 atoms = outOfPlaneAtoms[index];
-        float4 weights = outOfPlaneWeights[index];
-        float4 pos1 = posq[atoms.y];
-        float4 pos2 = posq[atoms.z];
-        float4 pos3 = posq[atoms.w];
-        float4 v12 = pos2-pos1;
-        float4 v13 = pos3-pos1;
-        float4 f = force[atoms.x];
-        float4 f1 = force[atoms.y];
-        float4 f2 = force[atoms.z];
-        float4 f3 = force[atoms.w];
-        float4 fp2 = (float4) (weights.x*f.x - weights.z*v13.z*f.y + weights.z*v13.y*f.z,
+        real4 weights = outOfPlaneWeights[index];
+        mixed4 pos1 = loadPos(posq, posqCorrection, atoms.y);
+        mixed4 pos2 = loadPos(posq, posqCorrection, atoms.z);
+        mixed4 pos3 = loadPos(posq, posqCorrection, atoms.w);
+        mixed4 v12 = pos2-pos1;
+        mixed4 v13 = pos3-pos1;
+        real4 f = force[atoms.x];
+        real4 f1 = force[atoms.y];
+        real4 f2 = force[atoms.z];
+        real4 f3 = force[atoms.w];
+        real4 fp2 = (real4) (weights.x*f.x - weights.z*v13.z*f.y + weights.z*v13.y*f.z,
                   weights.z*v13.z*f.x + weights.x*f.y - weights.z*v13.x*f.z,
                  -weights.z*v13.y*f.x + weights.z*v13.x*f.y + weights.x*f.z, 0.0f);
-        float4 fp3 = (float4) (weights.y*f.x + weights.z*v12.z*f.y - weights.z*v12.y*f.z,
+        real4 fp3 = (real4) (weights.y*f.x + weights.z*v12.z*f.y - weights.z*v12.y*f.z,
                  -weights.z*v12.z*f.x + weights.y*f.y + weights.z*v12.x*f.z,
                   weights.z*v12.y*f.x - weights.z*v12.x*f.y + weights.y*f.z, 0.0f);
        f1.xyz += f.xyz-fp2.xyz-fp3.xyz;

--- a/platforms/opencl/tests/TestOpenCLFFT.cpp
+++ b/platforms/opencl/tests/TestOpenCLFFT.cpp
@@ -51,7 +51,7 @@ using namespace std;
 void testTransform() {
    System system;
    system.addParticle(0.0);
-    OpenCLPlatform::PlatformData platformData(system, "", "");
+    OpenCLPlatform::PlatformData platformData(system, "", "", "single");
    OpenCLContext& context = *platformData.contexts[0];
    context.initialize();
    OpenMM_SFMT::SFMT sfmt;

--- a/platforms/opencl/tests/TestOpenCLRandom.cpp
+++ b/platforms/opencl/tests/TestOpenCLRandom.cpp
@@ -48,7 +48,7 @@ void testGaussian() {
    System system;
    for (int i = 0; i < numAtoms; i++)
        system.addParticle(1.0);
-    OpenCLPlatform::PlatformData platformData(system, "", "");
+    OpenCLPlatform::PlatformData platformData(system, "", "", "single");
    OpenCLContext& context = *platformData.contexts[0];
    context.initialize();
    context.getIntegrationUtilities().initRandomNumberGenerator(0);

--- a/platforms/opencl/tests/TestOpenCLSort.cpp
+++ b/platforms/opencl/tests/TestOpenCLSort.cpp
@@ -62,7 +62,7 @@ void verifySorting(vector<float> array) {

    System system;
    system.addParticle(0.0);
-    OpenCLPlatform::PlatformData platformData(system, "", "");
+    OpenCLPlatform::PlatformData platformData(system, "", "", "single");
    OpenCLContext& context = *platformData.contexts[0];
    context.initialize();
    OpenCLArray data(context, array.size(), sizeof(float), "sortData");