Unverified Commit d9756688 authored by peastman's avatar peastman Committed by GitHub
Browse files

Merge pull request #1992 from peastman/arrays

Simplification to CudaArray and OpenCLArray
parents 979525c7 c75dba47
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -45,9 +45,8 @@ namespace OpenMM {
class OpenCLCalcDrudeForceKernel : public CalcDrudeForceKernel {
public:
OpenCLCalcDrudeForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
CalcDrudeForceKernel(name, platform), cl(cl), particleParams(NULL), pairParams(NULL) {
CalcDrudeForceKernel(name, platform), cl(cl) {
}
~OpenCLCalcDrudeForceKernel();
/**
* Initialize the kernel.
*
......@@ -73,8 +72,8 @@ public:
void copyParametersToContext(ContextImpl& context, const DrudeForce& force);
private:
OpenCLContext& cl;
OpenCLArray* particleParams;
OpenCLArray* pairParams;
OpenCLArray particleParams;
OpenCLArray pairParams;
};
/**
......@@ -83,9 +82,8 @@ private:
class OpenCLIntegrateDrudeLangevinStepKernel : public IntegrateDrudeLangevinStepKernel {
public:
OpenCLIntegrateDrudeLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
IntegrateDrudeLangevinStepKernel(name, platform), cl(cl), hasInitializedKernels(false), normalParticles(NULL), pairParticles(NULL) {
IntegrateDrudeLangevinStepKernel(name, platform), cl(cl), hasInitializedKernels(false) {
}
~OpenCLIntegrateDrudeLangevinStepKernel();
/**
* Initialize the kernel.
*
......@@ -112,8 +110,8 @@ private:
OpenCLContext& cl;
bool hasInitializedKernels;
double prevStepSize;
OpenCLArray* normalParticles;
OpenCLArray* pairParticles;
OpenCLArray normalParticles;
OpenCLArray pairParticles;
cl::Kernel kernel1, kernel2, hardwallKernel;
};
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2013 Stanford University and the Authors. *
* Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -62,19 +62,6 @@ static int findFFTDimension(int minimum) {
}
}
CudaIntegrateRPMDStepKernel::~CudaIntegrateRPMDStepKernel() {
if (forces != NULL)
delete forces;
if (positions != NULL)
delete positions;
if (velocities != NULL)
delete velocities;
if (contractedForces != NULL)
delete contractedForces;
if (contractedPositions != NULL)
delete contractedPositions;
}
void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) {
cu.getPlatformData().initializeContexts(system);
numCopies = integrator.getNumCopies();
......@@ -85,30 +72,30 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
int paddedParticles = cu.getPaddedNumAtoms();
bool useDoublePrecision = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision());
int elementSize = (useDoublePrecision ? sizeof(double4) : sizeof(float4));
forces = CudaArray::create<long long>(cu, numCopies*paddedParticles*3, "rpmdForces");
positions = new CudaArray(cu, numCopies*paddedParticles, elementSize, "rpmdPositions");
velocities = new CudaArray(cu, numCopies*paddedParticles, elementSize, "rpmdVelocities");
forces.initialize<long long>(cu, numCopies*paddedParticles*3, "rpmdForces");
positions.initialize(cu, numCopies*paddedParticles, elementSize, "rpmdPositions");
velocities.initialize(cu, numCopies*paddedParticles, elementSize, "rpmdVelocities");
cu.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
// Fill in the posq and velm arrays with safe values to avoid a risk of nans.
if (useDoublePrecision) {
vector<double4> temp(positions->getSize());
for (int i = 0; i < positions->getSize(); i++)
vector<double4> temp(positions.getSize());
for (int i = 0; i < positions.getSize(); i++)
temp[i] = make_double4(0, 0, 0, 0);
positions->upload(temp);
for (int i = 0; i < velocities->getSize(); i++)
positions.upload(temp);
for (int i = 0; i < velocities.getSize(); i++)
temp[i] = make_double4(0, 0, 0, 1);
velocities->upload(temp);
velocities.upload(temp);
}
else {
vector<float4> temp(positions->getSize());
for (int i = 0; i < positions->getSize(); i++)
vector<float4> temp(positions.getSize());
for (int i = 0; i < positions.getSize(); i++)
temp[i] = make_float4(0, 0, 0, 0);
positions->upload(temp);
for (int i = 0; i < velocities->getSize(); i++)
positions.upload(temp);
for (int i = 0; i < velocities.getSize(); i++)
temp[i] = make_float4(0, 0, 0, 1);
velocities->upload(temp);
velocities.upload(temp);
}
// Build a list of contractions.
......@@ -137,8 +124,8 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
}
}
if (maxContractedCopies > 0) {
contractedForces = CudaArray::create<long long>(cu, maxContractedCopies*paddedParticles*3, "rpmdContractedForces");
contractedPositions = new CudaArray(cu, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions");
contractedForces.initialize<long long>(cu, maxContractedCopies*paddedParticles*3, "rpmdContractedForces");
contractedPositions.initialize(cu, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions");
}
// Create kernels.
......@@ -204,13 +191,13 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
float frictionFloat = (float) friction;
void* frictionPtr = (useDoublePrecision ? (void*) &friction : (void*) &frictionFloat);
int randomIndex = integration.prepareRandomNumbers(numParticles*numCopies);
void* pileArgs[] = {&velocities->getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex, dtPtr, kTPtr, frictionPtr};
void* pileArgs[] = {&velocities.getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex, dtPtr, kTPtr, frictionPtr};
if (integrator.getApplyThermostat())
cu.executeKernel(pileKernel, pileArgs, numParticles*numCopies, workgroupSize);
// Update positions and velocities.
void* stepArgs[] = {&positions->getDevicePointer(), &velocities->getDevicePointer(), &forces->getDevicePointer(), dtPtr, kTPtr};
void* stepArgs[] = {&positions.getDevicePointer(), &velocities.getDevicePointer(), &forces.getDevicePointer(), dtPtr, kTPtr};
cu.executeKernel(stepKernel, stepArgs, numParticles*numCopies, workgroupSize);
// Calculate forces based on the updated positions.
......@@ -219,7 +206,7 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
// Update velocities.
void* velocitiesArgs[] = {&velocities->getDevicePointer(), &forces->getDevicePointer(), dtPtr};
void* velocitiesArgs[] = {&velocities.getDevicePointer(), &forces.getDevicePointer(), dtPtr};
cu.executeKernel(velocitiesKernel, velocitiesArgs, numParticles*numCopies, workgroupSize);
// Apply the PILE-L thermostat again.
......@@ -239,7 +226,7 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
// the same translation to all the beads.
int i = numCopies-1;
void* args[] = {&positions->getDevicePointer(), &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
void* args[] = {&positions.getDevicePointer(), &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(translateKernel, args, cu.getNumAtoms());
}
}
......@@ -248,7 +235,7 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Compute forces from all groups that didn't have a specified contraction.
for (int i = 0; i < numCopies; i++) {
void* copyToContextArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions->getDevicePointer(),
void* copyToContextArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions.getDevicePointer(),
&cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(copyToContextKernel, copyToContextArgs, cu.getNumAtoms());
context.computeVirtualSites();
......@@ -260,8 +247,8 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
if (initialBox[0] != finalBox[0] || initialBox[1] != finalBox[1] || initialBox[2] != finalBox[2])
throw OpenMMException("Standard barostats cannot be used with RPMDIntegrator. Use RPMDMonteCarloBarostat instead.");
context.calcForcesAndEnergy(true, false, groupsNotContracted);
void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &forces->getDevicePointer(), &cu.getVelm().getDevicePointer(),
&velocities->getDevicePointer(), &cu.getPosq().getDevicePointer(), &positions->getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &forces.getDevicePointer(), &cu.getVelm().getDevicePointer(),
&velocities.getDevicePointer(), &cu.getPosq().getDevicePointer(), &positions.getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(copyFromContextKernel, copyFromContextArgs, cu.getNumAtoms());
}
......@@ -273,32 +260,32 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Find the contracted positions.
void* contractPosArgs[] = {&positions->getDevicePointer(), &contractedPositions->getDevicePointer()};
void* contractPosArgs[] = {&positions.getDevicePointer(), &contractedPositions.getDevicePointer()};
cu.executeKernel(positionContractionKernels[copies], contractPosArgs, numParticles*numCopies, workgroupSize);
// Compute forces.
for (int i = 0; i < copies; i++) {
void* copyToContextArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &contractedPositions->getDevicePointer(),
void* copyToContextArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &contractedPositions.getDevicePointer(),
&cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(copyToContextKernel, copyToContextArgs, cu.getNumAtoms());
context.computeVirtualSites();
context.calcForcesAndEnergy(true, false, groupFlags);
void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &contractedForces->getDevicePointer(), &cu.getVelm().getDevicePointer(),
&velocities->getDevicePointer(), &cu.getPosq().getDevicePointer(), &contractedPositions->getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &contractedForces.getDevicePointer(), &cu.getVelm().getDevicePointer(),
&velocities.getDevicePointer(), &cu.getPosq().getDevicePointer(), &contractedPositions.getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(copyFromContextKernel, copyFromContextArgs, cu.getNumAtoms());
}
// Apply the forces to the original copies.
void* contractForceArgs[] = {&forces->getDevicePointer(), &contractedForces->getDevicePointer()};
void* contractForceArgs[] = {&forces.getDevicePointer(), &contractedForces.getDevicePointer()};
cu.executeKernel(forceContractionKernels[copies], contractForceArgs, numParticles*numCopies, workgroupSize);
}
if (groupsByCopies.size() > 0) {
// Ensure the Context contains the positions from the last copy, since we'll assume that later.
int i = numCopies-1;
void* copyToContextArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions->getDevicePointer(),
void* copyToContextArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions.getDevicePointer(),
&cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
cu.executeKernel(copyToContextKernel, copyToContextArgs, cu.getNumAtoms());
}
......@@ -309,7 +296,7 @@ double CudaIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context, c
}
void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos) {
if (positions == NULL)
if (!positions.isInitialized())
throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context");
if (pos.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()");
......@@ -332,7 +319,7 @@ void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos
cu.getPosq().download(posq);
for (int i = 0; i < numParticles; i++)
posq[i] = make_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posq[i].w);
result = cuMemcpyHtoD(positions->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4));
result = cuMemcpyHtoD(positions.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4));
}
else if (cu.getUseMixedPrecision()) {
vector<float4> posqf(cu.getPaddedNumAtoms());
......@@ -340,24 +327,24 @@ void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos
vector<double4> posq(cu.getPaddedNumAtoms());
for (int i = 0; i < numParticles; i++)
posq[i] = make_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posqf[i].w);
result = cuMemcpyHtoD(positions->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4));
result = cuMemcpyHtoD(positions.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4));
}
else {
vector<float4> posq(cu.getPaddedNumAtoms());
cu.getPosq().download(posq);
for (int i = 0; i < numParticles; i++)
posq[i] = make_float4((float) offsetPos[i][0], (float) offsetPos[i][1], (float) offsetPos[i][2], posq[i].w);
result = cuMemcpyHtoD(positions->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &posq[0], numParticles*sizeof(float4));
result = cuMemcpyHtoD(positions.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &posq[0], numParticles*sizeof(float4));
}
if (result != CUDA_SUCCESS) {
std::stringstream str;
str<<"Error uploading array "<<positions->getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
str<<"Error uploading array "<<positions.getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(str.str());
}
}
void CudaIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& vel) {
if (velocities == NULL)
if (!velocities.isInitialized())
throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context");
if (vel.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()");
......@@ -367,24 +354,24 @@ void CudaIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& ve
cu.getVelm().download(velm);
for (int i = 0; i < numParticles; i++)
velm[i] = make_double4(vel[i][0], vel[i][1], vel[i][2], velm[i].w);
result = cuMemcpyHtoD(velocities->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &velm[0], numParticles*sizeof(double4));
result = cuMemcpyHtoD(velocities.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &velm[0], numParticles*sizeof(double4));
}
else {
vector<float4> velm(cu.getPaddedNumAtoms());
cu.getVelm().download(velm);
for (int i = 0; i < numParticles; i++)
velm[i] = make_float4((float) vel[i][0], (float) vel[i][1], (float) vel[i][2], velm[i].w);
result = cuMemcpyHtoD(velocities->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &velm[0], numParticles*sizeof(float4));
result = cuMemcpyHtoD(velocities.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &velm[0], numParticles*sizeof(float4));
}
if (result != CUDA_SUCCESS) {
std::stringstream str;
str<<"Error uploading array "<<velocities->getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
str<<"Error uploading array "<<velocities.getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(str.str());
}
}
void CudaIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) {
void* copyArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions->getDevicePointer(),
void* copyArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions.getDevicePointer(),
&cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &copy};
cu.executeKernel(copyToContextKernel, copyArgs, cu.getNumAtoms());
}
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2013 Stanford University and the Authors. *
* Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -46,9 +46,8 @@ namespace OpenMM {
class CudaIntegrateRPMDStepKernel : public IntegrateRPMDStepKernel {
public:
CudaIntegrateRPMDStepKernel(std::string name, const Platform& platform, CudaContext& cu) :
IntegrateRPMDStepKernel(name, platform), cu(cu), forces(NULL), positions(NULL), velocities(NULL), contractedForces(NULL), contractedPositions(NULL) {
IntegrateRPMDStepKernel(name, platform), cu(cu) {
}
~CudaIntegrateRPMDStepKernel();
/**
* Initialize the kernel.
*
......@@ -91,11 +90,11 @@ private:
int numCopies, numParticles, workgroupSize;
std::map<int, int> groupsByCopies;
int groupsNotContracted;
CudaArray* forces;
CudaArray* positions;
CudaArray* velocities;
CudaArray* contractedForces;
CudaArray* contractedPositions;
CudaArray forces;
CudaArray positions;
CudaArray velocities;
CudaArray contractedForces;
CudaArray contractedPositions;
CUfunction pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel;
std::map<int, CUfunction> positionContractionKernels;
std::map<int, CUfunction> forceContractionKernels;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2013 Stanford University and the Authors. *
* Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -41,19 +41,6 @@
using namespace OpenMM;
using namespace std;
OpenCLIntegrateRPMDStepKernel::~OpenCLIntegrateRPMDStepKernel() {
if (forces != NULL)
delete forces;
if (positions != NULL)
delete positions;
if (velocities != NULL)
delete velocities;
if (contractedForces != NULL)
delete contractedForces;
if (contractedPositions != NULL)
delete contractedPositions;
}
void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) {
cl.getPlatformData().initializeContexts(system);
numCopies = integrator.getNumCopies();
......@@ -63,32 +50,32 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
throw OpenMMException("RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5.");
int paddedParticles = cl.getPaddedNumAtoms();
int forceElementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
forces = new OpenCLArray(cl, numCopies*paddedParticles, forceElementSize, "rpmdForces");
forces.initialize(cl, numCopies*paddedParticles, forceElementSize, "rpmdForces");
bool useDoublePrecision = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision());
int elementSize = (useDoublePrecision ? sizeof(mm_double4) : sizeof(mm_float4));
positions = new OpenCLArray(cl, numCopies*paddedParticles, elementSize, "rpmdPositions");
velocities = new OpenCLArray(cl, numCopies*paddedParticles, elementSize, "rpmdVelocities");
positions.initialize(cl, numCopies*paddedParticles, elementSize, "rpmdPositions");
velocities.initialize(cl, numCopies*paddedParticles, elementSize, "rpmdVelocities");
cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
// Fill in the posq and velm arrays with safe values to avoid a risk of nans.
if (useDoublePrecision) {
vector<mm_double4> temp(positions->getSize());
for (int i = 0; i < positions->getSize(); i++)
vector<mm_double4> temp(positions.getSize());
for (int i = 0; i < positions.getSize(); i++)
temp[i] = mm_double4(0, 0, 0, 0);
positions->upload(temp);
for (int i = 0; i < velocities->getSize(); i++)
positions.upload(temp);
for (int i = 0; i < velocities.getSize(); i++)
temp[i] = mm_double4(0, 0, 0, 1);
velocities->upload(temp);
velocities.upload(temp);
}
else {
vector<mm_float4> temp(positions->getSize());
for (int i = 0; i < positions->getSize(); i++)
vector<mm_float4> temp(positions.getSize());
for (int i = 0; i < positions.getSize(); i++)
temp[i] = mm_float4(0, 0, 0, 0);
positions->upload(temp);
for (int i = 0; i < velocities->getSize(); i++)
positions.upload(temp);
for (int i = 0; i < velocities.getSize(); i++)
temp[i] = mm_float4(0, 0, 0, 1);
velocities->upload(temp);
velocities.upload(temp);
}
// Build a list of contractions.
......@@ -117,8 +104,8 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
}
}
if (maxContractedCopies > 0) {
contractedForces = new OpenCLArray(cl, maxContractedCopies*paddedParticles, forceElementSize, "rpmdContractedForces");
contractedPositions = new OpenCLArray(cl, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions");
contractedForces.initialize(cl, maxContractedCopies*paddedParticles, forceElementSize, "rpmdContractedForces");
contractedPositions.initialize(cl, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions");
}
// Create kernels.
......@@ -164,30 +151,30 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
void OpenCLIntegrateRPMDStepKernel::initializeKernels(ContextImpl& context) {
hasInitializedKernel = true;
pileKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(1, velocities->getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(2, forces->getDeviceBuffer());
velocitiesKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
velocitiesKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
pileKernel.setArg<cl::Buffer>(0, velocities.getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(0, positions.getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(1, velocities.getDeviceBuffer());
stepKernel.setArg<cl::Buffer>(2, forces.getDeviceBuffer());
velocitiesKernel.setArg<cl::Buffer>(0, velocities.getDeviceBuffer());
velocitiesKernel.setArg<cl::Buffer>(1, forces.getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(0, positions.getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(0, velocities.getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(3, cl.getPosq().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(4, cl.getAtomIndexArray().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(3, velocities->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(3, velocities.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(4, cl.getPosq().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(6, cl.getAtomIndexArray().getDeviceBuffer());
for (auto& g : groupsByCopies) {
int copies = g.first;
positionContractionKernels[copies].setArg<cl::Buffer>(0, positions->getDeviceBuffer());
positionContractionKernels[copies].setArg<cl::Buffer>(1, contractedPositions->getDeviceBuffer());
forceContractionKernels[copies].setArg<cl::Buffer>(0, forces->getDeviceBuffer());
forceContractionKernels[copies].setArg<cl::Buffer>(1, contractedForces->getDeviceBuffer());
positionContractionKernels[copies].setArg<cl::Buffer>(0, positions.getDeviceBuffer());
positionContractionKernels[copies].setArg<cl::Buffer>(1, contractedPositions.getDeviceBuffer());
forceContractionKernels[copies].setArg<cl::Buffer>(0, forces.getDeviceBuffer());
forceContractionKernels[copies].setArg<cl::Buffer>(1, contractedForces.getDeviceBuffer());
}
}
......@@ -261,9 +248,9 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Compute forces from all groups that didn't have a specified contraction.
copyToContextKernel.setArg<cl::Buffer>(2, positions->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(5, positions->getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(2, positions.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, forces.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(5, positions.getDeviceBuffer());
for (int i = 0; i < numCopies; i++) {
copyToContextKernel.setArg<cl_int>(5, i);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
......@@ -283,9 +270,9 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
// Now loop over contractions and compute forces from them.
if (groupsByCopies.size() > 0) {
copyToContextKernel.setArg<cl::Buffer>(2, contractedPositions->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, contractedForces->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(5, contractedPositions->getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(2, contractedPositions.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, contractedForces.getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(5, contractedPositions.getDeviceBuffer());
for (auto& g : groupsByCopies) {
int copies = g.first;
int groupFlags = g.second;
......@@ -313,7 +300,7 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
if (groupsByCopies.size() > 0) {
// Ensure the Context contains the positions from the last copy, since we'll assume that later.
copyToContextKernel.setArg<cl::Buffer>(2, positions->getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(2, positions.getDeviceBuffer());
copyToContextKernel.setArg<cl_int>(5, numCopies-1);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
}
......@@ -324,7 +311,7 @@ double OpenCLIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context,
}
void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos) {
if (positions == NULL)
if (!positions.isInitialized())
throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context");
if (pos.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()");
......@@ -346,7 +333,7 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
cl.getPosq().download(posq);
for (int i = 0; i < numParticles; i++)
posq[i] = mm_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posq[i].w);
cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]);
cl.getQueue().enqueueWriteBuffer(positions.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]);
}
else if (cl.getUseMixedPrecision()) {
vector<mm_float4> posqf(cl.getPaddedNumAtoms());
......@@ -354,19 +341,19 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
vector<mm_double4> posq(cl.getPaddedNumAtoms());
for (int i = 0; i < numParticles; i++)
posq[i] = mm_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posqf[i].w);
cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]);
cl.getQueue().enqueueWriteBuffer(positions.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]);
}
else {
vector<mm_float4> posq(cl.getPaddedNumAtoms());
cl.getPosq().download(posq);
for (int i = 0; i < numParticles; i++)
posq[i] = mm_float4((cl_float) offsetPos[i][0], (cl_float) offsetPos[i][1], (cl_float) offsetPos[i][2], posq[i].w);
cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &posq[0]);
cl.getQueue().enqueueWriteBuffer(positions.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &posq[0]);
}
}
void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& vel) {
if (velocities == NULL)
if (!velocities.isInitialized())
throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context");
if (vel.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()");
......@@ -375,21 +362,21 @@ void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>&
cl.getVelm().download(velm);
for (int i = 0; i < numParticles; i++)
velm[i] = mm_double4(vel[i][0], vel[i][1], vel[i][2], velm[i].w);
cl.getQueue().enqueueWriteBuffer(velocities->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &velm[0]);
cl.getQueue().enqueueWriteBuffer(velocities.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &velm[0]);
}
else {
vector<mm_float4> velm(cl.getPaddedNumAtoms());
cl.getVelm().download(velm);
for (int i = 0; i < numParticles; i++)
velm[i] = mm_float4((cl_float) vel[i][0], (cl_float) vel[i][1], (cl_float) vel[i][2], velm[i].w);
cl.getQueue().enqueueWriteBuffer(velocities->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &velm[0]);
cl.getQueue().enqueueWriteBuffer(velocities.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &velm[0]);
}
}
void OpenCLIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) {
if (!hasInitializedKernel)
initializeKernels(context);
copyToContextKernel.setArg<cl::Buffer>(2, positions->getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(2, positions.getDeviceBuffer());
copyToContextKernel.setArg<cl_int>(5, copy);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
}
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2013 Stanford University and the Authors. *
* Portions copyright (c) 2011-2018 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -45,9 +45,8 @@ namespace OpenMM {
class OpenCLIntegrateRPMDStepKernel : public IntegrateRPMDStepKernel {
public:
OpenCLIntegrateRPMDStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
IntegrateRPMDStepKernel(name, platform), cl(cl), hasInitializedKernel(false), forces(NULL), positions(NULL), velocities(NULL), contractedForces(NULL), contractedPositions(NULL) {
IntegrateRPMDStepKernel(name, platform), cl(cl), hasInitializedKernel(false) {
}
~OpenCLIntegrateRPMDStepKernel();
/**
* Initialize the kernel.
*
......@@ -92,11 +91,11 @@ private:
int numCopies, numParticles, workgroupSize;
std::map<int, int> groupsByCopies;
int groupsNotContracted;
OpenCLArray* forces;
OpenCLArray* positions;
OpenCLArray* velocities;
OpenCLArray* contractedForces;
OpenCLArray* contractedPositions;
OpenCLArray forces;
OpenCLArray positions;
OpenCLArray velocities;
OpenCLArray contractedForces;
OpenCLArray contractedPositions;
cl::Kernel pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel;
std::map<int, cl::Kernel> positionContractionKernels;
std::map<int, cl::Kernel> forceContractionKernels;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment