Commit 4cf66cab authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed bug in CustomIntegrator

parent afae4bc8
...@@ -1282,7 +1282,8 @@ private: ...@@ -1282,7 +1282,8 @@ private:
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid); void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
void recordChangedParameters(ContextImpl& context); void recordChangedParameters(ContextImpl& context);
CudaContext& cu; CudaContext& cu;
double prevStepSize; double prevStepSize, energy;
float energyFloat;
int numGlobalVariables; int numGlobalVariables;
bool hasInitializedKernels, deviceValuesAreCurrent, modifiesParameters, keNeedsForce; bool hasInitializedKernels, deviceValuesAreCurrent, modifiesParameters, keNeedsForce;
mutable bool localValuesAreCurrent; mutable bool localValuesAreCurrent;
...@@ -1303,7 +1304,7 @@ private: ...@@ -1303,7 +1304,7 @@ private:
std::vector<std::vector<CUfunction> > kernels; std::vector<std::vector<CUfunction> > kernels;
std::vector<std::vector<std::vector<void*> > > kernelArgs; std::vector<std::vector<std::vector<void*> > > kernelArgs;
std::vector<void*> kineticEnergyArgs; std::vector<void*> kineticEnergyArgs;
CUfunction sumPotentialEnergyKernel, randomKernel, kineticEnergyKernel, sumKineticEnergyKernel; CUfunction randomKernel, kineticEnergyKernel, sumKineticEnergyKernel;
std::vector<CustomIntegrator::ComputationType> stepType; std::vector<CustomIntegrator::ComputationType> stepType;
std::vector<bool> needsForces; std::vector<bool> needsForces;
std::vector<bool> needsEnergy; std::vector<bool> needsEnergy;
......
...@@ -5731,7 +5731,7 @@ string CudaIntegrateCustomStepKernel::createGlobalComputation(const string& vari ...@@ -5731,7 +5731,7 @@ string CudaIntegrateCustomStepKernel::createGlobalComputation(const string& vari
variables["dt"] = "dt[0].y"; variables["dt"] = "dt[0].y";
variables["uniform"] = "uniform"; variables["uniform"] = "uniform";
variables["gaussian"] = "gaussian"; variables["gaussian"] = "gaussian";
variables[energyName] = "energy[0]"; variables[energyName] = "energy";
for (int i = 0; i < integrator.getNumGlobalVariables(); i++) for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
variables[integrator.getGlobalVariableName(i)] = "globals["+cu.intToString(i)+"]"; variables[integrator.getGlobalVariableName(i)] = "globals["+cu.intToString(i)+"]";
for (int i = 0; i < (int) parameterNames.size(); i++) for (int i = 0; i < (int) parameterNames.size(); i++)
...@@ -5767,7 +5767,7 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari ...@@ -5767,7 +5767,7 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari
variables["m"] = "mass"; variables["m"] = "mass";
variables["dt"] = "stepSize"; variables["dt"] = "stepSize";
if (energyName != "") if (energyName != "")
variables[energyName] = "energy[0]"; variables[energyName] = "energy";
for (int i = 0; i < integrator.getNumGlobalVariables(); i++) for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
variables[integrator.getGlobalVariableName(i)] = "globals["+cu.intToString(i)+"]"; variables[integrator.getGlobalVariableName(i)] = "globals["+cu.intToString(i)+"]";
for (int i = 0; i < integrator.getNumPerDofVariables(); i++) for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
...@@ -6000,7 +6000,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -6000,7 +6000,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args1.push_back(NULL); args1.push_back(NULL);
args1.push_back(NULL); args1.push_back(NULL);
args1.push_back(NULL); args1.push_back(NULL);
args1.push_back(&potentialEnergy->getDevicePointer()); if (cu.getUseDoublePrecision())
args1.push_back(&energy);
else
args1.push_back(&energyFloat);
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
args1.push_back(&perDofValues->getBuffers()[i].getMemory()); args1.push_back(&perDofValues->getBuffers()[i].getMemory());
kernelArgs[step].push_back(args1); kernelArgs[step].push_back(args1);
...@@ -6049,7 +6052,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -6049,7 +6052,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args.push_back(&contextParameterValues->getDevicePointer()); args.push_back(&contextParameterValues->getDevicePointer());
args.push_back(NULL); args.push_back(NULL);
args.push_back(NULL); args.push_back(NULL);
args.push_back(&potentialEnergy->getDevicePointer()); if (cu.getUseDoublePrecision())
args.push_back(&energy);
else
args.push_back(&energyFloat);
kernelArgs[step].push_back(args); kernelArgs[step].push_back(args);
} }
else if (stepType[step] == CustomIntegrator::ConstrainPositions) { else if (stepType[step] == CustomIntegrator::ConstrainPositions) {
...@@ -6089,13 +6095,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -6089,13 +6095,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
CUmodule randomProgram = cu.createModule(CudaKernelSources::customIntegrator, defines); CUmodule randomProgram = cu.createModule(CudaKernelSources::customIntegrator, defines);
randomKernel = cu.getKernel(randomProgram, "generateRandomNumbers"); randomKernel = cu.getKernel(randomProgram, "generateRandomNumbers");
// Create the kernel for summing the potential energy.
defines["SUM_OUTPUT_INDEX"] = "0";
defines["SUM_BUFFER_SIZE"] = cu.intToString(cu.getEnergyBuffer().getSize());
CUmodule module = cu.createModule(CudaKernelSources::customIntegrator, defines);
sumPotentialEnergyKernel = cu.getKernel(module, cu.getUseDoublePrecision() ? "computeDoubleSum" : "computeFloatSum");
// Create the kernel for computing kinetic energy. // Create the kernel for computing kinetic energy.
stringstream computeKE; stringstream computeKE;
...@@ -6117,10 +6116,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -6117,10 +6116,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args << ", " << buffer.getType() << "* __restrict__ " << valueName; args << ", " << buffer.getType() << "* __restrict__ " << valueName;
} }
replacements["PARAMETER_ARGUMENTS"] = args.str(); replacements["PARAMETER_ARGUMENTS"] = args.str();
defines["SUM_OUTPUT_INDEX"] = "0";
defines["SUM_BUFFER_SIZE"] = cu.intToString(3*numAtoms); defines["SUM_BUFFER_SIZE"] = cu.intToString(3*numAtoms);
if (defines.find("LOAD_POS_AS_DELTA") != defines.end()) if (defines.find("LOAD_POS_AS_DELTA") != defines.end())
defines.erase("LOAD_POS_AS_DELTA"); defines.erase("LOAD_POS_AS_DELTA");
module = cu.createModule(cu.replaceStrings(CudaKernelSources::customIntegratorPerDof, replacements), defines); CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customIntegratorPerDof, replacements), defines);
kineticEnergyKernel = cu.getKernel(module, "computePerDof"); kineticEnergyKernel = cu.getKernel(module, "computePerDof");
kineticEnergyArgs.push_back(&cu.getPosq().getDevicePointer()); kineticEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
kineticEnergyArgs.push_back(NULL); kineticEnergyArgs.push_back(NULL);
...@@ -6134,7 +6134,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -6134,7 +6134,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
kineticEnergyArgs.push_back(NULL); kineticEnergyArgs.push_back(NULL);
kineticEnergyArgs.push_back(NULL); kineticEnergyArgs.push_back(NULL);
kineticEnergyArgs.push_back(&uniformRandoms->getDevicePointer()); kineticEnergyArgs.push_back(&uniformRandoms->getDevicePointer());
kineticEnergyArgs.push_back(&potentialEnergy->getDevicePointer()); if (cu.getUseDoublePrecision())
kineticEnergyArgs.push_back(&energy);
else
kineticEnergyArgs.push_back(&energyFloat);
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
kineticEnergyArgs.push_back(&perDofValues->getBuffers()[i].getMemory()); kineticEnergyArgs.push_back(&perDofValues->getBuffers()[i].getMemory());
keNeedsForce = usesVariable(keExpression, "f"); keNeedsForce = usesVariable(keExpression, "f");
...@@ -6240,11 +6243,8 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat ...@@ -6240,11 +6243,8 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
} }
else { else {
recordChangedParameters(context); recordChangedParameters(context);
context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroup[i]); energy = context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroup[i]);
if (computeEnergy) { energyFloat = (float) energy;
void* args[] = {&cu.getEnergyBuffer().getDevicePointer(), &potentialEnergy->getDevicePointer()};
cu.executeKernel(sumPotentialEnergyKernel, &args[0], CudaContext::ThreadBlockSize, CudaContext::ThreadBlockSize);
}
} }
forcesAreValid = true; forcesAreValid = true;
} }
...@@ -6315,11 +6315,8 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context, ...@@ -6315,11 +6315,8 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
bool willNeedEnergy = false; bool willNeedEnergy = false;
for (int i = 0; i < integrator.getNumComputations(); i++) for (int i = 0; i < integrator.getNumComputations(); i++)
willNeedEnergy |= needsEnergy[i]; willNeedEnergy |= needsEnergy[i];
context.calcForcesAndEnergy(true, willNeedEnergy, -1); energy = context.calcForcesAndEnergy(true, willNeedEnergy, -1);
if (willNeedEnergy) { energyFloat = (float) energy;
void* args[] = {&cu.getEnergyBuffer().getDevicePointer(), &potentialEnergy->getDevicePointer()};
cu.executeKernel(sumPotentialEnergyKernel, &args[0], CudaContext::ThreadBlockSize, CudaContext::ThreadBlockSize);
}
forcesAreValid = true; forcesAreValid = true;
} }
CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0); CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
......
extern "C" __global__ void computeGlobal(mixed2* __restrict__ dt, mixed* __restrict__ globals, mixed* __restrict__ params, extern "C" __global__ void computeGlobal(mixed2* __restrict__ dt, mixed* __restrict__ globals, mixed* __restrict__ params,
float uniform, float gaussian, const real* __restrict__ energy) { float uniform, float gaussian, const real energy) {
COMPUTE_STEP COMPUTE_STEP
} }
...@@ -34,7 +34,7 @@ inline __device__ mixed4 convertFromDouble4(double4 a) { ...@@ -34,7 +34,7 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ posDelta, extern "C" __global__ void computePerDof(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ posDelta,
mixed4* __restrict__ velm, const long long* __restrict__ force, const mixed2* __restrict__ dt, const mixed* __restrict__ globals, mixed4* __restrict__ velm, const long long* __restrict__ force, const mixed2* __restrict__ dt, const mixed* __restrict__ globals,
const mixed* __restrict__ params, mixed* __restrict__ sum, const float4* __restrict__ gaussianValues, const mixed* __restrict__ params, mixed* __restrict__ sum, const float4* __restrict__ gaussianValues,
unsigned int gaussianBaseIndex, const float4* __restrict__ uniformValues, const real* __restrict__ energy unsigned int gaussianBaseIndex, const float4* __restrict__ uniformValues, const real energy
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
mixed stepSize = dt[0].y; mixed stepSize = dt[0].y;
int index = blockIdx.x*blockDim.x+threadIdx.x; int index = blockIdx.x*blockDim.x+threadIdx.x;
......
...@@ -1273,7 +1273,7 @@ private: ...@@ -1273,7 +1273,7 @@ private:
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid); void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
void recordChangedParameters(ContextImpl& context); void recordChangedParameters(ContextImpl& context);
OpenCLContext& cl; OpenCLContext& cl;
double prevStepSize; double prevStepSize, energy;
int numGlobalVariables; int numGlobalVariables;
bool hasInitializedKernels, deviceValuesAreCurrent, modifiesParameters, keNeedsForce; bool hasInitializedKernels, deviceValuesAreCurrent, modifiesParameters, keNeedsForce;
mutable bool localValuesAreCurrent; mutable bool localValuesAreCurrent;
...@@ -1293,7 +1293,7 @@ private: ...@@ -1293,7 +1293,7 @@ private:
std::vector<double> contextValuesDouble; std::vector<double> contextValuesDouble;
std::vector<float> contextValues; std::vector<float> contextValues;
std::vector<std::vector<cl::Kernel> > kernels; std::vector<std::vector<cl::Kernel> > kernels;
cl::Kernel sumPotentialEnergyKernel, randomKernel, kineticEnergyKernel, sumKineticEnergyKernel; cl::Kernel randomKernel, kineticEnergyKernel, sumKineticEnergyKernel;
std::vector<CustomIntegrator::ComputationType> stepType; std::vector<CustomIntegrator::ComputationType> stepType;
std::vector<bool> needsForces; std::vector<bool> needsForces;
std::vector<bool> needsEnergy; std::vector<bool> needsEnergy;
......
...@@ -5992,7 +5992,7 @@ string OpenCLIntegrateCustomStepKernel::createGlobalComputation(const string& va ...@@ -5992,7 +5992,7 @@ string OpenCLIntegrateCustomStepKernel::createGlobalComputation(const string& va
variables["dt"] = "dt[0].y"; variables["dt"] = "dt[0].y";
variables["uniform"] = "uniform"; variables["uniform"] = "uniform";
variables["gaussian"] = "gaussian"; variables["gaussian"] = "gaussian";
variables[energyName] = "energy[0]"; variables[energyName] = "energy";
for (int i = 0; i < integrator.getNumGlobalVariables(); i++) for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
variables[integrator.getGlobalVariableName(i)] = "globals["+cl.intToString(i)+"]"; variables[integrator.getGlobalVariableName(i)] = "globals["+cl.intToString(i)+"]";
for (int i = 0; i < (int) parameterNames.size(); i++) for (int i = 0; i < (int) parameterNames.size(); i++)
...@@ -6028,7 +6028,7 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va ...@@ -6028,7 +6028,7 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va
variables["m"] = "mass"; variables["m"] = "mass";
variables["dt"] = "stepSize"; variables["dt"] = "stepSize";
if (energyName != "") if (energyName != "")
variables[energyName] = "energy[0]"; variables[energyName] = "energy";
for (int i = 0; i < integrator.getNumGlobalVariables(); i++) for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
variables[integrator.getGlobalVariableName(i)] = "globals["+cl.intToString(i)+"]"; variables[integrator.getGlobalVariableName(i)] = "globals["+cl.intToString(i)+"]";
for (int i = 0; i < integrator.getNumPerDofVariables(); i++) for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
...@@ -6258,8 +6258,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -6258,8 +6258,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kernel.setArg<cl::Buffer>(index++, globalValues->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, globalValues->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, contextParameterValues->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, contextParameterValues->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer());
index += 3; index += 4;
kernel.setArg<cl::Buffer>(index++, potentialEnergy->getDeviceBuffer());
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
kernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory()); kernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory());
if (stepType[step] == CustomIntegrator::ComputeSum) { if (stepType[step] == CustomIntegrator::ComputeSum) {
...@@ -6304,8 +6303,6 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -6304,8 +6303,6 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kernel.setArg<cl::Buffer>(index++, integration.getStepSize().getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, integration.getStepSize().getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, globalValues->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, globalValues->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, contextParameterValues->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, contextParameterValues->getDeviceBuffer());
index += 2;
kernel.setArg<cl::Buffer>(index++, potentialEnergy->getDeviceBuffer());
} }
else if (stepType[step] == CustomIntegrator::ConstrainPositions) { else if (stepType[step] == CustomIntegrator::ConstrainPositions) {
// Apply position constraints. // Apply position constraints.
...@@ -6346,16 +6343,6 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -6346,16 +6343,6 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
randomKernel.setArg<cl::Buffer>(1, uniformRandoms->getDeviceBuffer()); randomKernel.setArg<cl::Buffer>(1, uniformRandoms->getDeviceBuffer());
randomKernel.setArg<cl::Buffer>(2, randomSeed->getDeviceBuffer()); randomKernel.setArg<cl::Buffer>(2, randomSeed->getDeviceBuffer());
// Create the kernel for summing the potential energy.
cl::Program program = cl.createProgram(OpenCLKernelSources::customIntegrator, defines);
sumPotentialEnergyKernel = cl::Kernel(program, cl.getUseDoublePrecision() ? "computeDoubleSum" : "computeFloatSum");
int index = 0;
sumPotentialEnergyKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
sumPotentialEnergyKernel.setArg<cl::Buffer>(index++, potentialEnergy->getDeviceBuffer());
sumPotentialEnergyKernel.setArg<cl_int>(index++, 0);
sumPotentialEnergyKernel.setArg<cl_int>(index++, cl.getEnergyBuffer().getSize());
// Create the kernel for computing kinetic energy. // Create the kernel for computing kinetic energy.
stringstream computeKE; stringstream computeKE;
...@@ -6379,9 +6366,9 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -6379,9 +6366,9 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
replacements["PARAMETER_ARGUMENTS"] = args.str(); replacements["PARAMETER_ARGUMENTS"] = args.str();
if (defines.find("LOAD_POS_AS_DELTA") != defines.end()) if (defines.find("LOAD_POS_AS_DELTA") != defines.end())
defines.erase("LOAD_POS_AS_DELTA"); defines.erase("LOAD_POS_AS_DELTA");
program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customIntegratorPerDof, replacements), defines); cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customIntegratorPerDof, replacements), defines);
kineticEnergyKernel = cl::Kernel(program, "computePerDof"); kineticEnergyKernel = cl::Kernel(program, "computePerDof");
index = 0; int index = 0;
kineticEnergyKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
setPosqCorrectionArg(cl, kineticEnergyKernel, index++); setPosqCorrectionArg(cl, kineticEnergyKernel, index++);
kineticEnergyKernel.setArg<cl::Buffer>(index++, integration.getPosDelta().getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, integration.getPosDelta().getDeviceBuffer());
...@@ -6393,7 +6380,10 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -6393,7 +6380,10 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer());
index += 2; index += 2;
kineticEnergyKernel.setArg<cl::Buffer>(index++, uniformRandoms->getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, uniformRandoms->getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, potentialEnergy->getDeviceBuffer()); if (cl.getUseDoublePrecision())
kineticEnergyKernel.setArg<cl_double>(index++, 0.0);
else
kineticEnergyKernel.setArg<cl_float>(index++, 0.0f);
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
kineticEnergyKernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory()); kineticEnergyKernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory());
keNeedsForce = usesVariable(keExpression, "f"); keNeedsForce = usesVariable(keExpression, "f");
...@@ -6500,9 +6490,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -6500,9 +6490,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
} }
else { else {
recordChangedParameters(context); recordChangedParameters(context);
context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroup[i]); energy = context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroup[i]);
if (computeEnergy)
cl.executeKernel(sumPotentialEnergyKernel, OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlockSize);
forcesAreValid = true; forcesAreValid = true;
} }
} }
...@@ -6510,6 +6498,10 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -6510,6 +6498,10 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
kernels[i][0].setArg<cl_uint>(10, integration.prepareRandomNumbers(requiredGaussian[i])); kernels[i][0].setArg<cl_uint>(10, integration.prepareRandomNumbers(requiredGaussian[i]));
kernels[i][0].setArg<cl::Buffer>(9, integration.getRandom().getDeviceBuffer()); kernels[i][0].setArg<cl::Buffer>(9, integration.getRandom().getDeviceBuffer());
kernels[i][0].setArg<cl::Buffer>(11, uniformRandoms->getDeviceBuffer()); kernels[i][0].setArg<cl::Buffer>(11, uniformRandoms->getDeviceBuffer());
if (cl.getUseDoublePrecision())
kernels[i][0].setArg<cl_double>(12, energy);
else
kernels[i][0].setArg<cl_float>(12, (cl_float) energy);
if (requiredUniform[i] > 0) if (requiredUniform[i] > 0)
cl.executeKernel(randomKernel, numAtoms); cl.executeKernel(randomKernel, numAtoms);
cl.executeKernel(kernels[i][0], numAtoms); cl.executeKernel(kernels[i][0], numAtoms);
...@@ -6517,12 +6509,20 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -6517,12 +6509,20 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
else if (stepType[i] == CustomIntegrator::ComputeGlobal && !merged[i]) { else if (stepType[i] == CustomIntegrator::ComputeGlobal && !merged[i]) {
kernels[i][0].setArg<cl_float>(3, (cl_float) SimTKOpenMMUtilities::getUniformlyDistributedRandomNumber()); kernels[i][0].setArg<cl_float>(3, (cl_float) SimTKOpenMMUtilities::getUniformlyDistributedRandomNumber());
kernels[i][0].setArg<cl_float>(4, (cl_float) SimTKOpenMMUtilities::getNormallyDistributedRandomNumber()); kernels[i][0].setArg<cl_float>(4, (cl_float) SimTKOpenMMUtilities::getNormallyDistributedRandomNumber());
if (cl.getUseDoublePrecision())
kernels[i][0].setArg<cl_double>(5, energy);
else
kernels[i][0].setArg<cl_float>(5, (cl_float) energy);
cl.executeKernel(kernels[i][0], 1, 1); cl.executeKernel(kernels[i][0], 1, 1);
} }
else if (stepType[i] == CustomIntegrator::ComputeSum) { else if (stepType[i] == CustomIntegrator::ComputeSum) {
kernels[i][0].setArg<cl_uint>(10, integration.prepareRandomNumbers(requiredGaussian[i])); kernels[i][0].setArg<cl_uint>(10, integration.prepareRandomNumbers(requiredGaussian[i]));
kernels[i][0].setArg<cl::Buffer>(9, integration.getRandom().getDeviceBuffer()); kernels[i][0].setArg<cl::Buffer>(9, integration.getRandom().getDeviceBuffer());
kernels[i][0].setArg<cl::Buffer>(11, uniformRandoms->getDeviceBuffer()); kernels[i][0].setArg<cl::Buffer>(11, uniformRandoms->getDeviceBuffer());
if (cl.getUseDoublePrecision())
kernels[i][0].setArg<cl_double>(12, energy);
else
kernels[i][0].setArg<cl_float>(12, (cl_float) energy);
if (requiredUniform[i] > 0) if (requiredUniform[i] > 0)
cl.executeKernel(randomKernel, numAtoms); cl.executeKernel(randomKernel, numAtoms);
cl.clearBuffer(*sumBuffer); cl.clearBuffer(*sumBuffer);
...@@ -6572,9 +6572,7 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex ...@@ -6572,9 +6572,7 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex
bool willNeedEnergy = false; bool willNeedEnergy = false;
for (int i = 0; i < integrator.getNumComputations(); i++) for (int i = 0; i < integrator.getNumComputations(); i++)
willNeedEnergy |= needsEnergy[i]; willNeedEnergy |= needsEnergy[i];
context.calcForcesAndEnergy(true, willNeedEnergy, -1); energy = context.calcForcesAndEnergy(true, willNeedEnergy, -1);
if (willNeedEnergy)
cl.executeKernel(sumPotentialEnergyKernel, OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlockSize);
forcesAreValid = true; forcesAreValid = true;
} }
cl.clearBuffer(*sumBuffer); cl.clearBuffer(*sumBuffer);
......
__kernel void computeGlobal(__global mixed2* restrict dt, __global mixed* restrict globals, __global mixed* restrict params, __kernel void computeGlobal(__global mixed2* restrict dt, __global mixed* restrict globals, __global mixed* restrict params,
float uniform, float gaussian, __global const real* restrict energy) { float uniform, float gaussian, const real energy) {
COMPUTE_STEP COMPUTE_STEP
} }
...@@ -26,7 +26,7 @@ void storePos(__global real4* restrict posq, __global real4* restrict posqCorrec ...@@ -26,7 +26,7 @@ void storePos(__global real4* restrict posq, __global real4* restrict posqCorrec
__kernel void computePerDof(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict posDelta, __kernel void computePerDof(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict posDelta,
__global mixed4* restrict velm, __global const real4* restrict force, __global const mixed2* restrict dt, __global const mixed* restrict globals, __global mixed4* restrict velm, __global const real4* restrict force, __global const mixed2* restrict dt, __global const mixed* restrict globals,
__global const mixed* restrict params, __global mixed* restrict sum, __global const float4* restrict gaussianValues, __global const mixed* restrict params, __global mixed* restrict sum, __global const float4* restrict gaussianValues,
unsigned int gaussianBaseIndex, __global const float4* restrict uniformValues, __global const real* restrict energy unsigned int gaussianBaseIndex, __global const float4* restrict uniformValues, const real energy
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
mixed stepSize = dt[0].y; mixed stepSize = dt[0].y;
int index = get_global_id(0); int index = get_global_id(0);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment