"platforms/reference/include/ReferenceNoseHooverChain.h" did not exist on "d9941f475d4a2d64e7a5c8573ee143b1d9694ce4"
Commit 6f1f89d8 authored by peastman's avatar peastman
Browse files

Merge pull request #883 from peastman/customint

Fixed bug in CustomIntegrator
parents d4964400 4cf66cab
......@@ -1282,7 +1282,8 @@ private:
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
void recordChangedParameters(ContextImpl& context);
CudaContext& cu;
double prevStepSize;
double prevStepSize, energy;
float energyFloat;
int numGlobalVariables;
bool hasInitializedKernels, deviceValuesAreCurrent, modifiesParameters, keNeedsForce;
mutable bool localValuesAreCurrent;
......@@ -1303,7 +1304,7 @@ private:
std::vector<std::vector<CUfunction> > kernels;
std::vector<std::vector<std::vector<void*> > > kernelArgs;
std::vector<void*> kineticEnergyArgs;
CUfunction sumPotentialEnergyKernel, randomKernel, kineticEnergyKernel, sumKineticEnergyKernel;
CUfunction randomKernel, kineticEnergyKernel, sumKineticEnergyKernel;
std::vector<CustomIntegrator::ComputationType> stepType;
std::vector<bool> needsForces;
std::vector<bool> needsEnergy;
......
......@@ -5731,7 +5731,7 @@ string CudaIntegrateCustomStepKernel::createGlobalComputation(const string& vari
variables["dt"] = "dt[0].y";
variables["uniform"] = "uniform";
variables["gaussian"] = "gaussian";
variables[energyName] = "energy[0]";
variables[energyName] = "energy";
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
variables[integrator.getGlobalVariableName(i)] = "globals["+cu.intToString(i)+"]";
for (int i = 0; i < (int) parameterNames.size(); i++)
......@@ -5767,7 +5767,7 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari
variables["m"] = "mass";
variables["dt"] = "stepSize";
if (energyName != "")
variables[energyName] = "energy[0]";
variables[energyName] = "energy";
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
variables[integrator.getGlobalVariableName(i)] = "globals["+cu.intToString(i)+"]";
for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
......@@ -6000,7 +6000,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args1.push_back(NULL);
args1.push_back(NULL);
args1.push_back(NULL);
args1.push_back(&potentialEnergy->getDevicePointer());
if (cu.getUseDoublePrecision())
args1.push_back(&energy);
else
args1.push_back(&energyFloat);
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
args1.push_back(&perDofValues->getBuffers()[i].getMemory());
kernelArgs[step].push_back(args1);
......@@ -6049,7 +6052,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args.push_back(&contextParameterValues->getDevicePointer());
args.push_back(NULL);
args.push_back(NULL);
args.push_back(&potentialEnergy->getDevicePointer());
if (cu.getUseDoublePrecision())
args.push_back(&energy);
else
args.push_back(&energyFloat);
kernelArgs[step].push_back(args);
}
else if (stepType[step] == CustomIntegrator::ConstrainPositions) {
......@@ -6089,13 +6095,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
CUmodule randomProgram = cu.createModule(CudaKernelSources::customIntegrator, defines);
randomKernel = cu.getKernel(randomProgram, "generateRandomNumbers");
// Create the kernel for summing the potential energy.
defines["SUM_OUTPUT_INDEX"] = "0";
defines["SUM_BUFFER_SIZE"] = cu.intToString(cu.getEnergyBuffer().getSize());
CUmodule module = cu.createModule(CudaKernelSources::customIntegrator, defines);
sumPotentialEnergyKernel = cu.getKernel(module, cu.getUseDoublePrecision() ? "computeDoubleSum" : "computeFloatSum");
// Create the kernel for computing kinetic energy.
stringstream computeKE;
......@@ -6117,10 +6116,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args << ", " << buffer.getType() << "* __restrict__ " << valueName;
}
replacements["PARAMETER_ARGUMENTS"] = args.str();
defines["SUM_OUTPUT_INDEX"] = "0";
defines["SUM_BUFFER_SIZE"] = cu.intToString(3*numAtoms);
if (defines.find("LOAD_POS_AS_DELTA") != defines.end())
defines.erase("LOAD_POS_AS_DELTA");
module = cu.createModule(cu.replaceStrings(CudaKernelSources::customIntegratorPerDof, replacements), defines);
CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customIntegratorPerDof, replacements), defines);
kineticEnergyKernel = cu.getKernel(module, "computePerDof");
kineticEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
kineticEnergyArgs.push_back(NULL);
......@@ -6134,7 +6134,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
kineticEnergyArgs.push_back(NULL);
kineticEnergyArgs.push_back(NULL);
kineticEnergyArgs.push_back(&uniformRandoms->getDevicePointer());
kineticEnergyArgs.push_back(&potentialEnergy->getDevicePointer());
if (cu.getUseDoublePrecision())
kineticEnergyArgs.push_back(&energy);
else
kineticEnergyArgs.push_back(&energyFloat);
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
kineticEnergyArgs.push_back(&perDofValues->getBuffers()[i].getMemory());
keNeedsForce = usesVariable(keExpression, "f");
......@@ -6240,11 +6243,8 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
}
else {
recordChangedParameters(context);
context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroup[i]);
if (computeEnergy) {
void* args[] = {&cu.getEnergyBuffer().getDevicePointer(), &potentialEnergy->getDevicePointer()};
cu.executeKernel(sumPotentialEnergyKernel, &args[0], CudaContext::ThreadBlockSize, CudaContext::ThreadBlockSize);
}
energy = context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroup[i]);
energyFloat = (float) energy;
}
forcesAreValid = true;
}
......@@ -6315,11 +6315,8 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
bool willNeedEnergy = false;
for (int i = 0; i < integrator.getNumComputations(); i++)
willNeedEnergy |= needsEnergy[i];
context.calcForcesAndEnergy(true, willNeedEnergy, -1);
if (willNeedEnergy) {
void* args[] = {&cu.getEnergyBuffer().getDevicePointer(), &potentialEnergy->getDevicePointer()};
cu.executeKernel(sumPotentialEnergyKernel, &args[0], CudaContext::ThreadBlockSize, CudaContext::ThreadBlockSize);
}
energy = context.calcForcesAndEnergy(true, willNeedEnergy, -1);
energyFloat = (float) energy;
forcesAreValid = true;
}
CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
......
extern "C" __global__ void computeGlobal(mixed2* __restrict__ dt, mixed* __restrict__ globals, mixed* __restrict__ params,
float uniform, float gaussian, const real* __restrict__ energy) {
float uniform, float gaussian, const real energy) {
COMPUTE_STEP
}
......@@ -34,7 +34,7 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ posDelta,
mixed4* __restrict__ velm, const long long* __restrict__ force, const mixed2* __restrict__ dt, const mixed* __restrict__ globals,
const mixed* __restrict__ params, mixed* __restrict__ sum, const float4* __restrict__ gaussianValues,
unsigned int gaussianBaseIndex, const float4* __restrict__ uniformValues, const real* __restrict__ energy
unsigned int gaussianBaseIndex, const float4* __restrict__ uniformValues, const real energy
PARAMETER_ARGUMENTS) {
mixed stepSize = dt[0].y;
int index = blockIdx.x*blockDim.x+threadIdx.x;
......
......@@ -1273,7 +1273,7 @@ private:
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
void recordChangedParameters(ContextImpl& context);
OpenCLContext& cl;
double prevStepSize;
double prevStepSize, energy;
int numGlobalVariables;
bool hasInitializedKernels, deviceValuesAreCurrent, modifiesParameters, keNeedsForce;
mutable bool localValuesAreCurrent;
......@@ -1293,7 +1293,7 @@ private:
std::vector<double> contextValuesDouble;
std::vector<float> contextValues;
std::vector<std::vector<cl::Kernel> > kernels;
cl::Kernel sumPotentialEnergyKernel, randomKernel, kineticEnergyKernel, sumKineticEnergyKernel;
cl::Kernel randomKernel, kineticEnergyKernel, sumKineticEnergyKernel;
std::vector<CustomIntegrator::ComputationType> stepType;
std::vector<bool> needsForces;
std::vector<bool> needsEnergy;
......
......@@ -5993,7 +5993,7 @@ string OpenCLIntegrateCustomStepKernel::createGlobalComputation(const string& va
variables["dt"] = "dt[0].y";
variables["uniform"] = "uniform";
variables["gaussian"] = "gaussian";
variables[energyName] = "energy[0]";
variables[energyName] = "energy";
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
variables[integrator.getGlobalVariableName(i)] = "globals["+cl.intToString(i)+"]";
for (int i = 0; i < (int) parameterNames.size(); i++)
......@@ -6029,7 +6029,7 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va
variables["m"] = "mass";
variables["dt"] = "stepSize";
if (energyName != "")
variables[energyName] = "energy[0]";
variables[energyName] = "energy";
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
variables[integrator.getGlobalVariableName(i)] = "globals["+cl.intToString(i)+"]";
for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
......@@ -6259,8 +6259,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kernel.setArg<cl::Buffer>(index++, globalValues->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, contextParameterValues->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer());
index += 3;
kernel.setArg<cl::Buffer>(index++, potentialEnergy->getDeviceBuffer());
index += 4;
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
kernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory());
if (stepType[step] == CustomIntegrator::ComputeSum) {
......@@ -6305,8 +6304,6 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kernel.setArg<cl::Buffer>(index++, integration.getStepSize().getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, globalValues->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, contextParameterValues->getDeviceBuffer());
index += 2;
kernel.setArg<cl::Buffer>(index++, potentialEnergy->getDeviceBuffer());
}
else if (stepType[step] == CustomIntegrator::ConstrainPositions) {
// Apply position constraints.
......@@ -6347,16 +6344,6 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
randomKernel.setArg<cl::Buffer>(1, uniformRandoms->getDeviceBuffer());
randomKernel.setArg<cl::Buffer>(2, randomSeed->getDeviceBuffer());
// Create the kernel for summing the potential energy.
cl::Program program = cl.createProgram(OpenCLKernelSources::customIntegrator, defines);
sumPotentialEnergyKernel = cl::Kernel(program, cl.getUseDoublePrecision() ? "computeDoubleSum" : "computeFloatSum");
int index = 0;
sumPotentialEnergyKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
sumPotentialEnergyKernel.setArg<cl::Buffer>(index++, potentialEnergy->getDeviceBuffer());
sumPotentialEnergyKernel.setArg<cl_int>(index++, 0);
sumPotentialEnergyKernel.setArg<cl_int>(index++, cl.getEnergyBuffer().getSize());
// Create the kernel for computing kinetic energy.
stringstream computeKE;
......@@ -6380,9 +6367,9 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
replacements["PARAMETER_ARGUMENTS"] = args.str();
if (defines.find("LOAD_POS_AS_DELTA") != defines.end())
defines.erase("LOAD_POS_AS_DELTA");
program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customIntegratorPerDof, replacements), defines);
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customIntegratorPerDof, replacements), defines);
kineticEnergyKernel = cl::Kernel(program, "computePerDof");
index = 0;
int index = 0;
kineticEnergyKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
setPosqCorrectionArg(cl, kineticEnergyKernel, index++);
kineticEnergyKernel.setArg<cl::Buffer>(index++, integration.getPosDelta().getDeviceBuffer());
......@@ -6394,7 +6381,10 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer());
index += 2;
kineticEnergyKernel.setArg<cl::Buffer>(index++, uniformRandoms->getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, potentialEnergy->getDeviceBuffer());
if (cl.getUseDoublePrecision())
kineticEnergyKernel.setArg<cl_double>(index++, 0.0);
else
kineticEnergyKernel.setArg<cl_float>(index++, 0.0f);
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
kineticEnergyKernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory());
keNeedsForce = usesVariable(keExpression, "f");
......@@ -6501,9 +6491,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
}
else {
recordChangedParameters(context);
context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroup[i]);
if (computeEnergy)
cl.executeKernel(sumPotentialEnergyKernel, OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlockSize);
energy = context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroup[i]);
forcesAreValid = true;
}
}
......@@ -6511,6 +6499,10 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
kernels[i][0].setArg<cl_uint>(10, integration.prepareRandomNumbers(requiredGaussian[i]));
kernels[i][0].setArg<cl::Buffer>(9, integration.getRandom().getDeviceBuffer());
kernels[i][0].setArg<cl::Buffer>(11, uniformRandoms->getDeviceBuffer());
if (cl.getUseDoublePrecision())
kernels[i][0].setArg<cl_double>(12, energy);
else
kernels[i][0].setArg<cl_float>(12, (cl_float) energy);
if (requiredUniform[i] > 0)
cl.executeKernel(randomKernel, numAtoms);
cl.executeKernel(kernels[i][0], numAtoms);
......@@ -6518,12 +6510,20 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
else if (stepType[i] == CustomIntegrator::ComputeGlobal && !merged[i]) {
kernels[i][0].setArg<cl_float>(3, (cl_float) SimTKOpenMMUtilities::getUniformlyDistributedRandomNumber());
kernels[i][0].setArg<cl_float>(4, (cl_float) SimTKOpenMMUtilities::getNormallyDistributedRandomNumber());
if (cl.getUseDoublePrecision())
kernels[i][0].setArg<cl_double>(5, energy);
else
kernels[i][0].setArg<cl_float>(5, (cl_float) energy);
cl.executeKernel(kernels[i][0], 1, 1);
}
else if (stepType[i] == CustomIntegrator::ComputeSum) {
kernels[i][0].setArg<cl_uint>(10, integration.prepareRandomNumbers(requiredGaussian[i]));
kernels[i][0].setArg<cl::Buffer>(9, integration.getRandom().getDeviceBuffer());
kernels[i][0].setArg<cl::Buffer>(11, uniformRandoms->getDeviceBuffer());
if (cl.getUseDoublePrecision())
kernels[i][0].setArg<cl_double>(12, energy);
else
kernels[i][0].setArg<cl_float>(12, (cl_float) energy);
if (requiredUniform[i] > 0)
cl.executeKernel(randomKernel, numAtoms);
cl.clearBuffer(*sumBuffer);
......@@ -6573,9 +6573,7 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex
bool willNeedEnergy = false;
for (int i = 0; i < integrator.getNumComputations(); i++)
willNeedEnergy |= needsEnergy[i];
context.calcForcesAndEnergy(true, willNeedEnergy, -1);
if (willNeedEnergy)
cl.executeKernel(sumPotentialEnergyKernel, OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlockSize);
energy = context.calcForcesAndEnergy(true, willNeedEnergy, -1);
forcesAreValid = true;
}
cl.clearBuffer(*sumBuffer);
......
__kernel void computeGlobal(__global mixed2* restrict dt, __global mixed* restrict globals, __global mixed* restrict params,
float uniform, float gaussian, __global const real* restrict energy) {
float uniform, float gaussian, const real energy) {
COMPUTE_STEP
}
......@@ -26,7 +26,7 @@ void storePos(__global real4* restrict posq, __global real4* restrict posqCorrec
__kernel void computePerDof(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict posDelta,
__global mixed4* restrict velm, __global const real4* restrict force, __global const mixed2* restrict dt, __global const mixed* restrict globals,
__global const mixed* restrict params, __global mixed* restrict sum, __global const float4* restrict gaussianValues,
unsigned int gaussianBaseIndex, __global const float4* restrict uniformValues, __global const real* restrict energy
unsigned int gaussianBaseIndex, __global const float4* restrict uniformValues, const real energy
PARAMETER_ARGUMENTS) {
mixed stepSize = dt[0].y;
int index = get_global_id(0);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment