Commit 403e4744 authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixes to CustomIntegrator

parent 9f717609
...@@ -7109,6 +7109,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -7109,6 +7109,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
needsForces[step-1] = (needsForces[step] || needsForces[step-1]); needsForces[step-1] = (needsForces[step] || needsForces[step-1]);
needsEnergy[step-1] = (needsEnergy[step] || needsEnergy[step-1]); needsEnergy[step-1] = (needsEnergy[step] || needsEnergy[step-1]);
needsGlobals[step-1] = (needsGlobals[step] || needsGlobals[step-1]); needsGlobals[step-1] = (needsGlobals[step] || needsGlobals[step-1]);
computeBothForceAndEnergy[step-1] = (computeBothForceAndEnergy[step] || computeBothForceAndEnergy[step-1]);
} }
// Loop over all steps and create the kernels for them. // Loop over all steps and create the kernels for them.
...@@ -7187,7 +7188,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -7187,7 +7188,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args1.push_back(NULL); args1.push_back(NULL);
args1.push_back(NULL); args1.push_back(NULL);
args1.push_back(NULL); args1.push_back(NULL);
if (cu.getUseDoublePrecision()) if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision())
args1.push_back(&energy); args1.push_back(&energy);
else else
args1.push_back(&energyFloat); args1.push_back(&energyFloat);
...@@ -7282,7 +7283,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -7282,7 +7283,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
kineticEnergyArgs.push_back(NULL); kineticEnergyArgs.push_back(NULL);
kineticEnergyArgs.push_back(NULL); kineticEnergyArgs.push_back(NULL);
kineticEnergyArgs.push_back(&uniformRandoms->getDevicePointer()); kineticEnergyArgs.push_back(&uniformRandoms->getDevicePointer());
if (cu.getUseDoublePrecision()) if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision())
kineticEnergyArgs.push_back(&energy); kineticEnergyArgs.push_back(&energy);
else else
kineticEnergyArgs.push_back(&energyFloat); kineticEnergyArgs.push_back(&energyFloat);
......
...@@ -34,7 +34,7 @@ inline __device__ mixed4 convertFromDouble4(double4 a) { ...@@ -34,7 +34,7 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ posDelta, extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ posDelta,
mixed4* __restrict__ velm, const long long* __restrict__ force, const mixed2* __restrict__ dt, const mixed* __restrict__ globals, mixed4* __restrict__ velm, const long long* __restrict__ force, const mixed2* __restrict__ dt, const mixed* __restrict__ globals,
mixed* __restrict__ sum, const float4* __restrict__ gaussianValues, unsigned int gaussianBaseIndex, const float4* __restrict__ uniformValues, mixed* __restrict__ sum, const float4* __restrict__ gaussianValues, unsigned int gaussianBaseIndex, const float4* __restrict__ uniformValues,
const real energy, mixed* __restrict__ energyParamDerivs const mixed energy, mixed* __restrict__ energyParamDerivs
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
mixed stepSize = dt[0].y; mixed stepSize = dt[0].y;
int index = blockIdx.x*blockDim.x+threadIdx.x; int index = blockIdx.x*blockDim.x+threadIdx.x;
......
...@@ -7398,6 +7398,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7398,6 +7398,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
needsForces[step-1] = (needsForces[step] || needsForces[step-1]); needsForces[step-1] = (needsForces[step] || needsForces[step-1]);
needsEnergy[step-1] = (needsEnergy[step] || needsEnergy[step-1]); needsEnergy[step-1] = (needsEnergy[step] || needsEnergy[step-1]);
needsGlobals[step-1] = (needsGlobals[step] || needsGlobals[step-1]); needsGlobals[step-1] = (needsGlobals[step] || needsGlobals[step-1]);
computeBothForceAndEnergy[step-1] = (computeBothForceAndEnergy[step] || computeBothForceAndEnergy[step-1]);
} }
// Loop over all steps and create the kernels for them. // Loop over all steps and create the kernels for them.
...@@ -7568,7 +7569,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7568,7 +7569,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer());
index += 2; index += 2;
kineticEnergyKernel.setArg<cl::Buffer>(index++, uniformRandoms->getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, uniformRandoms->getDeviceBuffer());
if (cl.getUseDoublePrecision()) if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision())
kineticEnergyKernel.setArg<cl_double>(index++, 0.0); kineticEnergyKernel.setArg<cl_double>(index++, 0.0);
else else
kineticEnergyKernel.setArg<cl_float>(index++, 0.0f); kineticEnergyKernel.setArg<cl_float>(index++, 0.0f);
...@@ -7717,7 +7718,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -7717,7 +7718,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
kernels[step][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[step])); kernels[step][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[step]));
kernels[step][0].setArg<cl::Buffer>(8, integration.getRandom().getDeviceBuffer()); kernels[step][0].setArg<cl::Buffer>(8, integration.getRandom().getDeviceBuffer());
kernels[step][0].setArg<cl::Buffer>(10, uniformRandoms->getDeviceBuffer()); kernels[step][0].setArg<cl::Buffer>(10, uniformRandoms->getDeviceBuffer());
if (cl.getUseDoublePrecision()) if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision())
kernels[step][0].setArg<cl_double>(11, energy); kernels[step][0].setArg<cl_double>(11, energy);
else else
kernels[step][0].setArg<cl_float>(11, (cl_float) energy); kernels[step][0].setArg<cl_float>(11, (cl_float) energy);
...@@ -7735,7 +7736,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -7735,7 +7736,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
kernels[step][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[step])); kernels[step][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[step]));
kernels[step][0].setArg<cl::Buffer>(8, integration.getRandom().getDeviceBuffer()); kernels[step][0].setArg<cl::Buffer>(8, integration.getRandom().getDeviceBuffer());
kernels[step][0].setArg<cl::Buffer>(10, uniformRandoms->getDeviceBuffer()); kernels[step][0].setArg<cl::Buffer>(10, uniformRandoms->getDeviceBuffer());
if (cl.getUseDoublePrecision()) if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision())
kernels[step][0].setArg<cl_double>(11, energy); kernels[step][0].setArg<cl_double>(11, energy);
else else
kernels[step][0].setArg<cl_float>(11, (cl_float) energy); kernels[step][0].setArg<cl_float>(11, (cl_float) energy);
......
...@@ -26,7 +26,7 @@ void storePos(__global real4* restrict posq, __global real4* restrict posqCorrec ...@@ -26,7 +26,7 @@ void storePos(__global real4* restrict posq, __global real4* restrict posqCorrec
__kernel void computePerDof(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict posDelta, __kernel void computePerDof(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict posDelta,
__global mixed4* restrict velm, __global const real4* restrict force, __global const mixed2* restrict dt, __global const mixed* restrict globals, __global mixed4* restrict velm, __global const real4* restrict force, __global const mixed2* restrict dt, __global const mixed* restrict globals,
__global mixed* restrict sum, __global const float4* restrict gaussianValues, unsigned int gaussianBaseIndex, __global const float4* restrict uniformValues, __global mixed* restrict sum, __global const float4* restrict gaussianValues, unsigned int gaussianBaseIndex, __global const float4* restrict uniformValues,
const real energy, __global mixed* restrict energyParamDerivs const mixed energy, __global mixed* restrict energyParamDerivs
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
mixed stepSize = dt[0].y; mixed stepSize = dt[0].y;
int index = get_global_id(0); int index = get_global_id(0);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment